# load libraries
library(foreign)
library(dplyr)
library(stringr)
library(ggplot2)
library(ggpubr)
library(reshape2)
library(ggpubr)
library(sjmisc)
library(qwraps2)
library(stargazer)
library(rms)
library(lme4)
library(table1)
library(effects)
library(sjlabelled)
library(performance)
# load data
load(paste0(path1,"pyNE_2021_2023.Rdata"))
load(paste0(path2,"pyWest_2018_2020_2022.Rdata"))
# limit variables & merge
py <- rbind(east[,c("survey.year","region","county", "munic.area", "urban", "youthreach", "school.id",
"grade", "gender3", "age.group", "ethnicity3",
"Ma_edu","home.parents2","home.parents3", "home.parents4", "home.language",
"genMH","badMH", "sharm2", "sdqtot",
"dep.appetite", "dep.lonely", "dep.cry", "dep.sleep",
"dep.sad" ,"dep.apathy" ,"dep.hopeless",
"dep.lim7", "suic.att",
"smokelife.bin", "smoke30days.bin", "eciglife.bin", "ecig30days.bin", "alclife.bin", "alc30days.bin", "cannabislife.bin", "cannabis30days.bin")],
west[,c("survey.year","region","county","munic.area", "urban", "youthreach", "school.id",
"grade", "gender3", "age.group", "ethnicity3",
"Ma_edu","home.parents2","home.parents3","home.parents4", "home.language",
"genMH","badMH", "sharm2", "sdqtot",
"dep.appetite", "dep.lonely", "dep.cry", "dep.sleep",
"dep.sad" ,"dep.apathy" ,"dep.hopeless",
"dep.lim7", "suic.att",
"smokelife.bin", "smoke30days.bin", "eciglife.bin", "ecig30days.bin", "alclife.bin", "alc30days.bin", "cannabislife.bin", "cannabis30days.bin")])
# enter urbanicity scores manually ; calculated separately (see "Urbanicity of counties barplot.keynote" and "Census data for PY regions.Rmd")
py$urbanicity <- NA
py$urbanicity[py$munic.area=="North Dublin"] <- 4.46
py$urbanicity[py$munic.area=="Cavan"] <- 2.36
py$urbanicity[py$munic.area=="Monaghan"] <- 2.44
py$urbanicity[py$munic.area=="Mayo"] <- 2.16
py$urbanicity[py$munic.area=="Roscommon"] <- 2.29
py$urbanicity[py$munic.area=="Galway"] <- 2.87
py$urbanicity[py$munic.area=="Galway_City"] <- 6.0
# make ref level 0 meaningful
py$urbanicity <- py$urbanicity-1 # now ranges from 0-5, increasing urbanicity
# limit to 15 years and up (but allow missings) [N 24,073 -> 23,068]
py <- py[is.na(py$age.group) | (py$age.group=="15" | py$age.group=="16" | py$age.group=="17 or older"), ]
py$age.group <- droplevels(py$age.group)
# keep the full dataframe here
py.full <- py
# remove Youthreach students
py.YR <- py[py$youthreach=="Yes",]
py <- py[py$youthreach=="No",]
# remove those in 3rd year (pre-Junior Cert completion)
py <- py[is.na(py$grade) | (
#py$grade=="3rd Year" |
py$grade=="4th Year" | py$grade=="5th Year"
| py$grade=="6th Year" | py$grade=="LCA/QQI"),]
py$grade <- droplevels(py$grade)
Sample N for analysis = 21436
| Overall (N=21436) |
|
|---|---|
| gender3 | |
| Male | 10623 (49.6%) |
| Female | 10243 (47.8%) |
| Other | 480 (2.2%) |
| Missing | 90 (0.4%) |
| age.group | |
| 15 | 6779 (31.6%) |
| 16 | 12619 (58.9%) |
| 17-19 | 1961 (9.1%) |
| Missing | 77 (0.4%) |
| ethnicity3 | |
| White Irish | 12909 (60.2%) |
| White non-Irish | 1937 (9.0%) |
| Black, Asian or other minority ethnicity | 1781 (8.3%) |
| Not Asked | 4722 (22.0%) |
| Missing | 87 (0.4%) |
| grade | |
| 4th Year | 15450 (72.1%) |
| 5th Year | 5352 (25.0%) |
| LCA/QQI | 221 (1.0%) |
| 6th Year | 77 (0.4%) |
| Missing | 336 (1.6%) |
| Family make-up | |
| 2 Parents | 16784 (78.3%) |
| 1 Parent | 2764 (12.9%) |
| 1 Parent +other | 980 (4.6%) |
| Other | 811 (3.8%) |
| Missing | 97 (0.5%) |
| Maternal education level | |
| College or University | 12628 (58.9%) |
| Secondary School | 4968 (23.2%) |
| Primary School | 802 (3.7%) |
| Dont know/Doesnt apply | 2891 (13.5%) |
| Missing | 147 (0.7%) |
| school.type | |
| Mainstream | 21436 (100%) |
| urbanicity | |
| Mean (SD) | 2.20 (1.25) |
| Median [Min, Max] | 1.87 [1.16, 5.00] |
Table notes:
* 82% of the 17-25 age-group are from mainstream schools, and are
therefore likely 19 or younger
* ethnicity was not recorded in the West in 2018 (20% of sample)
* ‘White Irish’ ethnicity included Irish travellers
* ‘College/University’ level education included technical college,
diploma and apprenticeship
* “another” language included Irish
* “Other” family type at home included living alone, foster care &
exchange student host family
Let’s look at how demographics changed in each survey year. Note that this also reflects regional differences (even years = West; odd years = North East)
## Warning in table1.formula(~gender3 + age.group + ethnicity3 + grade +
## home.parents4 + : Terms to the right of '|' in formula 'x' define table columns
## and are expected to be factors with meaningful labels.
| 2018 (N=4722) |
2020 (N=4828) |
2021 (N=3160) |
2022 (N=5122) |
2023 (N=3604) |
Overall (N=21436) |
|
|---|---|---|---|---|---|---|
| gender3 | ||||||
| Male | 2337 (49.5%) | 2360 (48.9%) | 1633 (51.7%) | 2505 (48.9%) | 1788 (49.6%) | 10623 (49.6%) |
| Female | 2358 (49.9%) | 2374 (49.2%) | 1377 (43.6%) | 2469 (48.2%) | 1665 (46.2%) | 10243 (47.8%) |
| Other | 0 (0%) | 77 (1.6%) | 130 (4.1%) | 139 (2.7%) | 134 (3.7%) | 480 (2.2%) |
| Missing | 27 (0.6%) | 17 (0.4%) | 20 (0.6%) | 9 (0.2%) | 17 (0.5%) | 90 (0.4%) |
| age.group | ||||||
| 15 | 1439 (30.5%) | 1341 (27.8%) | 1014 (32.1%) | 1452 (28.3%) | 1533 (42.5%) | 6779 (31.6%) |
| 16 | 3025 (64.1%) | 3094 (64.1%) | 1595 (50.5%) | 3109 (60.7%) | 1796 (49.8%) | 12619 (58.9%) |
| 17-19 | 240 (5.1%) | 378 (7.8%) | 531 (16.8%) | 560 (10.9%) | 252 (7.0%) | 1961 (9.1%) |
| Missing | 18 (0.4%) | 15 (0.3%) | 20 (0.6%) | 1 (0.0%) | 23 (0.6%) | 77 (0.4%) |
| ethnicity3 | ||||||
| White Irish | 0 (0%) | 4077 (84.4%) | 2199 (69.6%) | 3976 (77.6%) | 2657 (73.7%) | 12909 (60.2%) |
| White non-Irish | 0 (0%) | 408 (8.5%) | 595 (18.8%) | 525 (10.3%) | 409 (11.3%) | 1937 (9.0%) |
| Black, Asian or other minority ethnicity | 0 (0%) | 313 (6.5%) | 340 (10.8%) | 613 (12.0%) | 515 (14.3%) | 1781 (8.3%) |
| Not Asked | 4722 (100%) | 0 (0%) | 0 (0%) | 0 (0%) | 0 (0%) | 4722 (22.0%) |
| Missing | 0 (0%) | 30 (0.6%) | 26 (0.8%) | 8 (0.2%) | 23 (0.6%) | 87 (0.4%) |
| grade | ||||||
| 4th Year | 3288 (69.6%) | 3546 (73.4%) | 2098 (66.4%) | 3743 (73.1%) | 2775 (77.0%) | 15450 (72.1%) |
| 5th Year | 1382 (29.3%) | 1176 (24.4%) | 960 (30.4%) | 1253 (24.5%) | 581 (16.1%) | 5352 (25.0%) |
| LCA/QQI | 0 (0%) | 83 (1.7%) | 81 (2.6%) | 57 (1.1%) | 0 (0%) | 221 (1.0%) |
| 6th Year | 10 (0.2%) | 0 (0%) | 0 (0%) | 61 (1.2%) | 6 (0.2%) | 77 (0.4%) |
| Missing | 42 (0.9%) | 23 (0.5%) | 21 (0.7%) | 8 (0.2%) | 242 (6.7%) | 336 (1.6%) |
| Family make-up | ||||||
| 2 Parents | 3749 (79.4%) | 3845 (79.6%) | 2438 (77.2%) | 3973 (77.6%) | 2779 (77.1%) | 16784 (78.3%) |
| 1 Parent | 638 (13.5%) | 627 (13.0%) | 426 (13.5%) | 634 (12.4%) | 439 (12.2%) | 2764 (12.9%) |
| 1 Parent +other | 233 (4.9%) | 207 (4.3%) | 188 (5.9%) | 191 (3.7%) | 161 (4.5%) | 980 (4.6%) |
| Other | 84 (1.8%) | 126 (2.6%) | 85 (2.7%) | 312 (6.1%) | 204 (5.7%) | 811 (3.8%) |
| Missing | 18 (0.4%) | 23 (0.5%) | 23 (0.7%) | 12 (0.2%) | 21 (0.6%) | 97 (0.5%) |
| Maternal education level | ||||||
| College or University | 2415 (51.1%) | 2482 (51.4%) | 1953 (61.8%) | 3380 (66.0%) | 2398 (66.5%) | 12628 (58.9%) |
| Secondary School | 1188 (25.2%) | 1080 (22.4%) | 767 (24.3%) | 1233 (24.1%) | 700 (19.4%) | 4968 (23.2%) |
| Primary School | 337 (7.1%) | 261 (5.4%) | 49 (1.6%) | 94 (1.8%) | 61 (1.7%) | 802 (3.7%) |
| Dont know/Doesnt apply | 751 (15.9%) | 949 (19.7%) | 369 (11.7%) | 402 (7.8%) | 420 (11.7%) | 2891 (13.5%) |
| Missing | 31 (0.7%) | 56 (1.2%) | 22 (0.7%) | 13 (0.3%) | 25 (0.7%) | 147 (0.7%) |
| school.type | ||||||
| Mainstream | 4722 (100%) | 4828 (100%) | 3160 (100%) | 5122 (100%) | 3604 (100%) | 21436 (100%) |
| munic.area | ||||||
| North Dublin | 0 (0%) | 0 (0%) | 1510 (47.8%) | 0 (0%) | 1858 (51.6%) | 3368 (15.7%) |
| Cavan | 0 (0%) | 0 (0%) | 834 (26.4%) | 0 (0%) | 822 (22.8%) | 1656 (7.7%) |
| Monaghan | 0 (0%) | 0 (0%) | 816 (25.8%) | 0 (0%) | 924 (25.6%) | 1740 (8.1%) |
| Mayo | 1463 (31.0%) | 1479 (30.6%) | 0 (0%) | 1478 (28.9%) | 0 (0%) | 4420 (20.6%) |
| Roscommon | 511 (10.8%) | 629 (13.0%) | 0 (0%) | 672 (13.1%) | 0 (0%) | 1812 (8.5%) |
| Galway County | 1975 (41.8%) | 1966 (40.7%) | 0 (0%) | 2097 (40.9%) | 0 (0%) | 6038 (28.2%) |
| Galway City | 773 (16.4%) | 754 (15.6%) | 0 (0%) | 875 (17.1%) | 0 (0%) | 2402 (11.2%) |
d <- as.data.frame(table(py$school.id))
# count number of schools per county
schls <- data.frame(region=rep(unique(py$munic.area),2),
school.type = c(rep("Mainstream",7),rep("YouthReach",7)))
for (x in unique(py$munic.area)) {
for (y in unique(py$school.type)) {
schls$num[schls$region==x & schls$school.type==y] <-
length(unique(py$school.id[py$munic.area==x & py$school.type==y]))
schls$pupils[schls$region==x & schls$school.type==y] <-
length(py$school.id[py$munic.area==x & py$school.type==y])
}
}
print(schls[schls$school.type=="Mainstream",])
## region school.type num pupils
## 1 Cavan Mainstream 11 1656
## 2 Monaghan Mainstream 12 1740
## 3 North Dublin Mainstream 16 3368
## 4 Roscommon Mainstream 9 1812
## 5 Mayo Mainstream 27 4420
## 6 Galway County Mainstream 29 6038
## 7 Galway City Mainstream 10 2402
Split into 4 sections:
Q1. 5-year prevalence estimates for secondary school students in Ireland
(N~20,000).
Q2. Time trends: how have prevalence estimates changed throughout the 5
years (descriptive only).
Q3. Which sub-groups in the adolescent population have the highest
probability of reporting these outcomes?
Q4. How did prevalence estimates between COVID and non-COVID survey
periods?
county.cols <- c( "deepskyblue1","dodgerblue4", "cornflowerblue",
"tomato","firebrick4", "violetred")
munic.cols <- c( "deepskyblue1", "dodgerblue4","cornflowerblue",
"goldenrod","firebrick4", "violetred", "tomato" )
gender.colors <- c("steelblue3", "rosybrown3", "wheat3")
dvlist_nice <- c("Poor mental health", "Repetitive self-harm", "Suicide attempt")
# Descriptives table
py$selfharm.x5 <- factor(py$selfharm.x5, levels=c("No", "Yes"))
py$suic.att <- relevel(py$suic.att, "No")
table1(~ badMH.f + selfharm.x5 + suic.att | covid.period,
data=py, c(left = "Overall"),
caption = "Overall = 5-year National average")
| Overall (N=21436) |
Non-COVID years (N=13448) |
COVID-years (N=7988) |
|
|---|---|---|---|
| badMH.f | |||
| Very good/Good/Okay | 16971 (79.2%) | 10922 (81.2%) | 6049 (75.7%) |
| Bad/Very bad | 4131 (19.3%) | 2273 (16.9%) | 1858 (23.3%) |
| Missing | 334 (1.6%) | 253 (1.9%) | 81 (1.0%) |
| selfharm.x5 | |||
| No | 18159 (84.7%) | 11330 (84.3%) | 6829 (85.5%) |
| Yes | 2490 (11.6%) | 1559 (11.6%) | 931 (11.7%) |
| Missing | 787 (3.7%) | 559 (4.2%) | 228 (2.9%) |
| suic.att | |||
| No | 18863 (88.0%) | 11869 (88.3%) | 6994 (87.6%) |
| Yes | 1747 (8.1%) | 1018 (7.6%) | 729 (9.1%) |
| Missing | 826 (3.9%) | 561 (4.2%) | 265 (3.3%) |
# model-derived AVERAGE prevalence of each outcome
library(gmodels)
dvlist <- c("badMH", "selfharm.x5", "suic.att")
py$suic.att <- relevel(py$suic.att, "No")
py$suic.att <- relevel(py$suic.att, "No")
mod <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ 1 + (1|munic.area/school.id),
list(i = as.name(x)))),
data = py,
family=binomial() )} )
# (a) what's the average rate of each outcome?
mod.avgs <- data.frame(outcome = dvlist, prob = NA)
mod.avgs.county <- c()
mod.avgs.school <- c()
for (DV in 1:3) {
mod.avgs$prob[DV]= mean( predict(mod[[DV]], type="response") )
mod.avgs$ci.lo[DV]=quantile(predict(mod[[DV]], type="response"), 0.025)
mod.avgs$ci.hi[DV]=quantile(predict(mod[[DV]], type="response"), 0.975)
mod.avgs$N_analysis[DV] = nrow(mod[[DV]]@frame)
# predict probability of outcome for each school
newdata <- unique(py[,c("munic.area", "school.id")])
newdata$pred.resp <- predict(mod[[DV]], newdata=newdata, type="response",
re.form=~(1|munic.area/school.id))
mod.avgs.school <- rbind(mod.avgs.school,
cbind(newdata, dvlist[DV]))
# average those within each county to get predicted prob on the county level
mod.avgs.county <- rbind(mod.avgs.county,
cbind(mod.avgs.school[mod.avgs.school$`dvlist[DV]`==dvlist[DV],]%>%
group_by(munic.area)%>%
summarise(mean=mean(pred.resp),
ci_lo=ci(pred.resp)[2],
ci_hi=ci(pred.resp)[3]),
dvlist[DV]) )
}
mod.avgs[,c(2:4)] <- round(mod.avgs[,c(2:4)],3)
print(mod.avgs)
## outcome prob ci.lo ci.hi N_analysis
## 1 badMH 0.196 0.119 0.296 21102
## 2 selfharm.x5 0.120 0.053 0.184 20649
## 3 suic.att 0.084 0.048 0.130 20610
# plot model-derived mean prevalence for each outcome with variation by area and school
lims = c(0.025,.325)
d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="badMH",]
d_co <- mod.avgs.county[mod.avgs.county$`dvlist[DV]`=="badMH",]
g1<-ggplot()+
geom_hline(yintercept=mean(mod.avgs$prob[mod.avgs$outcome=="badMH"]), linetype="dashed", colour="blue")+
geom_point(data=d_sch, aes(x=reorder(munic.area,-pred.resp), y=pred.resp), position=position_jitter(width=0.15),
shape=1, alpha=0.8, colour="grey")+
geom_errorbar(data=d_co, aes(x=reorder(munic.area,-mean), y=mean, ymin=ci_lo, ymax=ci_hi),
colour="grey30", width=0.3)+
geom_point(data=d_co, aes(x=reorder(munic.area,-mean), y=mean),
colour="grey30")+
geom_text(stat="identity", aes(label= paste0("Average = ",round(mean(mod.avgs$prob[mod.avgs$outcome=="badMH"])*100,
digits=1), "%"),
x=7, y=0.32, hjust=1),
size=4, colour="blue")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims,
name="Probability")+
labs(title ="Poor mental health", x="")+
theme_bw()+
theme(axis.text.x=element_text(angle=90))
d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="selfharm.x5",]
d_co <- mod.avgs.county[mod.avgs.county$`dvlist[DV]`=="selfharm.x5",]
g2<-ggplot()+
geom_hline(yintercept=mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"]), linetype="dashed", colour="blue")+
geom_point(data=d_sch, aes(x=reorder(munic.area,-pred.resp), y=pred.resp), position=position_jitter(width=0.15),
shape=1, alpha=0.8, colour="grey")+
geom_errorbar(data=d_co, aes(x=reorder(munic.area,-mean), y=mean, ymin=ci_lo, ymax=ci_hi),
colour="grey30", width=0.3)+
geom_point(data=d_co, aes(x=reorder(munic.area,-mean), y=mean),
colour="grey30")+
geom_text(stat="identity", aes(label= paste0("Average = ",
round(mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"])*100,
digits=1), "%"),
x=7, y=0.32, hjust=1),
size=4, colour="blue")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims,
name="Probability")+
labs(title ="Frequent self-harm", x="")+
theme_bw()+
theme(axis.text.x=element_text(angle=90))
d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="suic.att",]
d_co <- mod.avgs.county[mod.avgs.county$`dvlist[DV]`=="suic.att",]
g3<-ggplot()+
geom_hline(yintercept=mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"]), linetype="dashed", colour="blue")+
geom_point(data=d_sch, aes(x=reorder(munic.area,-pred.resp), y=pred.resp), position=position_jitter(width=0.15),
shape=1, alpha=0.8, colour="grey")+
geom_errorbar(data=d_co, aes(x=reorder(munic.area,-mean), y=mean, ymin=ci_lo, ymax=ci_hi),
colour="grey30", width=0.3)+
geom_point(data=d_co,aes(x=reorder(munic.area,-mean), y=mean),
colour="grey30")+
geom_text(stat="identity", aes(label= paste0("Average = ",
round(mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"])*100,
digits=1), "%"),
x=7, y=0.32, hjust=1),
size=4, colour="blue")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims,
name=" ")+
labs(title ="Suicide Attempt", x="")+
theme_bw()+
theme(axis.text.x=element_text(angle=90))
ggarrange(g1, g2, g3, nrow=1, ncol=3)
# plot model-derived mean prevalence for each outcome with variation by school only
lims = c(0.025,.325)
d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="badMH",]
subj.ave <- mean(mod.avgs$prob[mod.avgs$outcome=="badMH"])
g1<-ggplot()+
geom_rect(aes(xmin=-Inf, xmax=Inf,
ymin= mod.avgs$ci.lo[mod.avgs$outcome=="badMH"], ymax= mod.avgs$ci.hi[mod.avgs$outcome=="badMH"]),
fill="black", alpha=0.1)+
geom_hline(yintercept=subj.ave, colour="blue")+
geom_point(data=d_sch, aes(x= school.id, y=pred.resp),
shape=1, alpha=0.8, colour="black")+
geom_text(stat="identity", aes(label= paste0("Sample Average = ", round(mean(mod.avgs$prob[mod.avgs$outcome=="badMH"])*100, digits=1), "%"),
x=60, y=.31), hjust=0.5, vjust=-1, size=4, colour="blue")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims, name="Probability")+
labs(title ="Poor mental health", x="")+
theme_bw()+
theme(axis.text.x=element_blank(),
axis.ticks.x = element_blank(),
panel.grid.major.x=element_blank())
d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="selfharm.x5",]
subj.ave <- mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"])
g2<-ggplot()+
geom_rect(aes(xmin=-Inf, xmax=Inf,
ymin= mod.avgs$ci.lo[mod.avgs$outcome=="selfharm.x5"], ymax= mod.avgs$ci.hi[mod.avgs$outcome=="selfharm.x5"]),
fill="black", alpha=0.1)+
geom_hline(yintercept=subj.ave, colour="blue")+
geom_point(data=d_sch, aes(x= school.id, y=pred.resp),
shape=1, alpha=0.8, colour="black")+
geom_text(stat="identity", aes(label= paste0("Sample Average = ", round(mean(mod.avgs$prob[mod.avgs$outcome=="selfharm.x5"])*100, digits=1), "%"),
x=60, y=.31), hjust=0.5, vjust=-1, size=4, colour="blue")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims, name=" ")+
labs(title ="Repetitive Self-Harm", x="")+
theme_bw()+
theme(axis.text.x=element_blank(),
axis.ticks.x = element_blank(),
panel.grid.major.x=element_blank())
d_sch <- mod.avgs.school[mod.avgs.school$`dvlist[DV]`=="suic.att",]
subj.ave <- mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"])
g3<-ggplot()+
geom_rect(aes(xmin=-Inf, xmax=Inf,
ymin= mod.avgs$ci.lo[mod.avgs$outcome=="suic.att"], ymax= mod.avgs$ci.hi[mod.avgs$outcome=="suic.att"]),
fill="black", alpha=0.1)+
geom_hline(yintercept=subj.ave, colour="blue")+
geom_point(data=d_sch, aes(x= school.id, y=pred.resp),
shape=1, alpha=0.8, colour="black")+
geom_text(stat="identity", aes(label= paste0("Sample Average = ", round(mean(mod.avgs$prob[mod.avgs$outcome=="suic.att"])*100, digits=1), "%"),
x=60, y=.31), hjust=0.5, vjust=-1, size=4, colour="blue")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims, name=" ")+
labs(title ="Suicide Attempt", x="")+
theme_bw()+
theme(axis.text.x=element_blank(),
axis.ticks.x = element_blank(),
panel.grid.major.x=element_blank())
ggarrange(g1, g2, g3, nrow=1, ncol=3)
lims = c(.05,.325)
# badmh
d<-py[ (!is.na(py$badMH)) ,] %>%
group_by(survey.year,munic.area, badMH) %>%
summarise(n=n()) %>%
mutate(freq=n/sum(n),
totn=sum(n))
d <- d[d$badMH==1,]
for (x in 1:length(d$survey.year)){
d$ci.low[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][1]
d$ci.hi[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][2]
}
g1<-ggplot(data=d)+
geom_rect(aes(xmin=2019.75, xmax=2021.25,
ymin= -Inf, ymax= Inf), fill="grey92", alpha=0.1)+
geom_ribbon(aes(x=survey.year, ymin=ci.low, ymax=ci.hi, fill=munic.area), alpha=0.1)+
geom_point(aes(x=survey.year, y=freq, colour=munic.area),size=1.7)+
geom_line(aes(x=survey.year, y=freq, colour=munic.area), size=1, alpha=0.7)+
labs(title="Poor mental health",
x="")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
breaks=c(.05, .10, .15, .20, .25, .30),
minor_breaks = NULL,
limits = lims,
name="")+
scale_colour_manual(values=munic.cols)+
scale_fill_manual(values=munic.cols)+
theme_bw()+
theme(panel.grid.minor.x=element_blank(),
axis.text.x = element_text(angle=90))
# frequent self-harm
d<-py[ (!is.na(py$selfharm.x5)) ,] %>%
group_by(survey.year,munic.area, selfharm.x5) %>%
summarise(n=n()) %>%
mutate(freq=n/sum(n),
totn=sum(n))
d <- d[d$selfharm.x5=="Yes",]
for (x in 1:length(d$survey.year)){
d$ci.low[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][1]
d$ci.hi[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][2]
}
g2<-ggplot(data=d)+
#geom_smooth(aes(x=survey.year, y=freq), method="loess", colour="grey85", alpha=0.2)+
geom_rect(aes(xmin=2019.75, xmax=2021.25,
ymin= -Inf, ymax= Inf), fill="grey92", alpha=0.1)+
geom_ribbon(aes(x=survey.year, ymin=ci.low, ymax=ci.hi, fill=munic.area), alpha=0.1)+
geom_point(aes(x=survey.year, y=freq, colour=munic.area),size=1.7)+
geom_line(aes(x=survey.year, y=freq, colour=munic.area), size=1, alpha=0.7)+
labs(title="Repetitive self-harm",
x="")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
breaks=c(.05, .10, .15, .20, .25, .30),
minor_breaks = NULL,
limits = lims,name="")+
scale_colour_manual(values=munic.cols)+
scale_fill_manual(values=munic.cols)+
theme_bw()+
theme(panel.grid.minor.x=element_blank(),
axis.text.x = element_text(angle=90))
# suicide attempt
d<-py[ (!is.na(py$suic.att)) ,] %>%
group_by(survey.year,munic.area, suic.att) %>%
summarise(n=n()) %>%
mutate(freq=n/sum(n),
totn=sum(n))
d <- d[d$suic.att=="Yes",]
for (x in 1:length(d$survey.year)){
d$ci.low[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][1]
d$ci.hi[x] <- binom.test(d$n[x], d$totn[x])[["conf.int"]][2]
}
g3<-ggplot(data=d)+
#geom_smooth(aes(x=survey.year, y=freq), method="loess", colour="grey85", alpha=0.2)+
geom_rect(aes(xmin=2019.75, xmax=2021.25,
ymin= -Inf, ymax= Inf), fill="grey92", alpha=0.1)+
geom_ribbon(aes(x=survey.year, ymin=ci.low, ymax=ci.hi, fill=munic.area), alpha=0.1)+
geom_point(aes(x=survey.year, y=freq, colour=munic.area),size=1.7)+
geom_line(aes(x=survey.year, y=freq, colour=munic.area), size=1, alpha=0.7)+
labs(title="Suicide attempt",
x="")+
scale_y_continuous(labels = scales::percent_format(accuracy=5),
breaks=c(.05, .10, .15, .20, .25, .30),
minor_breaks = NULL,
limits = lims, name="")+
scale_colour_manual(values=munic.cols)+
scale_fill_manual(values=munic.cols)+
theme_bw()+
theme(panel.grid.minor.x=element_blank(),
axis.text.x = element_text(angle=90))
g<-ggarrange(g1,g2,g3, ncol=3,nrow=1, common.legend = T, legend="right")
annotate_figure(g, top=text_grob("Change in rates of mental health outcomes over time (secondary school students only)\n"))
lims = c(.05,.80)
# badmh
d<-py[ (!is.na(py$badMH)) ,] %>%
group_by(survey.year,gender3, badMH) %>%
summarise(n=n()) %>%
mutate(freq=n/sum(n))
d <- d[d$badMH==1 & !is.na(d$gender3),]
g1<-ggplot(data=d)+
#geom_smooth(aes(x=survey.year, y=freq), method="loess", colour="grey85", alpha=0.2)+
geom_point(aes(x=survey.year, y=freq, colour=gender3),size=1.7)+
geom_line(aes(x=survey.year, y=freq, colour=gender3), size=1, alpha=0.7)+
labs(title="Poor mental health",
x="Year of survey")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims,
name="")+
scale_colour_manual(values=gender.colors)+
theme_bw()+
theme(panel.grid.minor.x=element_blank(),
axis.text.x = element_text(angle=90))
# frequent self-harm
d<-py[ (!is.na(py$selfharm.x5)) ,] %>%
group_by(survey.year,gender3, selfharm.x5) %>%
summarise(n=n()) %>%
mutate(freq=n/sum(n))
d <- d[d$selfharm.x5=="Yes" & !is.na(d$gender3),]
g2<-ggplot(data=d)+
geom_point(aes(x=survey.year, y=freq, colour=gender3),size=1.7)+
geom_line(aes(x=survey.year, y=freq, colour=gender3), size=1, alpha=0.7)+
labs(title="Frequent self-harm",
x="Year of survey")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims,name="")+
scale_colour_manual(values=gender.colors)+
theme_bw()+
theme(panel.grid.minor.x=element_blank(),
axis.text.x = element_text(angle=90))
# suicide attempt
d<-py[ (!is.na(py$suic.att)) ,] %>%
group_by(survey.year,gender3, suic.att) %>%
summarise(n=n()) %>%
mutate(freq=n/sum(n))
d <- d[d$suic.att=="Yes" & !is.na(d$gender3),]
g3<-ggplot(data=d)+
geom_point(aes(x=survey.year, y=freq, colour=gender3),size=1.7)+
geom_line(aes(x=survey.year, y=freq, colour=gender3), size=1, alpha=0.7)+
labs(title="Lifetime suicide attempt",
x="Year of survey")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims,name="")+
scale_colour_manual(values=gender.colors)+
theme_bw()+
theme(panel.grid.minor.x=element_blank(),
axis.text.x = element_text(angle=90))
ggarrange(g1,g2,g3, ncol=3,nrow=1, common.legend = T, legend="right")
# the above plot conflates survey year with region (the peaks may reflect the region that only took part that year)
# so, merge certain years together so that each point reflects all regions.
py$survey.period <- factor(py$survey.year, levels=c(2018, 2020, 2021, 2022, 2023),
labels=c("2018", "2020-21","2020-21", "2022-23", "2022-23"))
lims = c(.05,.70)
# badmh
d<-py[ (!is.na(py$badMH) & py$survey.year>2018) ,] %>%
group_by(survey.period,gender3, badMH) %>%
summarise(n=n()) %>%
mutate(freq=n/sum(n))
d <- d[d$badMH==1 & !is.na(d$gender3),]
g1<-ggplot(data=d)+
#geom_smooth(aes(x=survey.period, y=freq), method="loess", colour="grey85", alpha=0.2)+
geom_point(aes(x= survey.period, y=freq, group=gender3,colour=gender3),size=1.7)+
geom_line(aes(x=survey.period, y=freq, group=gender3,colour=gender3), size=1, alpha=0.7)+
labs(title="Poor mental health",
x="")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims,
name="")+
scale_colour_manual(values=gender.colors)+
theme_bw()+
theme(panel.grid.minor.x=element_blank(),
axis.text.x = element_text(size=10))
# frequent self-harm
d<-py[ (!is.na(py$selfharm.x5))& py$survey.year>2018 ,] %>%
group_by(survey.period,gender3, selfharm.x5) %>%
summarise(n=n()) %>%
mutate(freq=n/sum(n))
d <- d[d$selfharm.x5=="Yes" & !is.na(d$gender3),]
g2<-ggplot(data=d)+
#geom_smooth(aes(x=survey.period, y=freq), method="loess", colour="grey85", alpha=0.2)+
geom_point(aes(x=survey.period, y=freq, group=gender3,colour=gender3),size=1.7)+
geom_line(aes(x=survey.period, y=freq, group=gender3,colour=gender3), size=1, alpha=0.7)+
labs(title="Repetitive self-harm",
x="")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims,name="")+
scale_colour_manual(values=gender.colors)+
theme_bw()+
theme(panel.grid.minor.x=element_blank(),
axis.text.x = element_text(size=10))
# suicide attempt
d<-py[ (!is.na(py$suic.att))& py$survey.year>2018 ,] %>%
group_by(survey.period,gender3, suic.att) %>%
summarise(n=n()) %>%
mutate(freq=n/sum(n))
d <- d[d$suic.att=="Yes" & !is.na(d$gender3),]
g3<-ggplot(data=d)+
#geom_smooth(aes(x=survey.period, y=freq), method="loess", colour="grey85", alpha=0.2)+
geom_point(aes(x=survey.period, y=freq, group=gender3, colour=gender3),size=1.7)+
geom_line(aes(x=survey.period, y=freq,group=gender3, colour=gender3), size=1, alpha=0.7)+
labs(title="Lifetime suicide attempt",
x="")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L),
limits = lims,name="")+
scale_colour_manual(values=gender.colors)+
theme_bw()+
theme(panel.grid.minor.x=element_blank(),
axis.text.x = element_text(size=10))
ggarrange(g1,g2,g3, ncol=3,nrow=1, common.legend = T, legend="right")
# set reference levels
py$age.group <- relevel(py$age.group, "16")
py$region <- relevel(py$region, "West")
py$selfharm.x5 <- factor(py$selfharm.x5, levels=c("No", "Yes"))
py$suic.att <- relevel(py$suic.att, "No")
# create a linear version of time with baseline (0) at 2018
py$linear.time <- py$survey.year-2018
py$suic.att <- relevel(py$suic.att, "No")
dvlist <- c("badMH", "selfharm.x5", "suic.att")
py$urbanicity.z <- scale(py$urbanicity,scale=F)
groupdiffs <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ age.group + gender3 + urbanicity.z +
+ home.parents3 + Ma_edu + covid.period
+ (1|munic.area/school.id),
list(i = as.name(x)))),
data = py,
family=binomial() )} )
# Extract Odds ratios for a table
group.ORs <- c()
for (DV in 1:3){
res1out <- as.data.frame(summary(groupdiffs[[DV]])[["coefficients"]])
res1out$var <- rownames(res1out)
res1out$outcome <- dvlist[DV]
res1out$OR <- exp(res1out$Estimate)
res1out$CI.low<-exp(res1out$Estimate - (1.96*res1out$`Std. Error`))
res1out$CI.hi<-exp(res1out$Estimate + (1.96*res1out$`Std. Error`))
rownames(res1out) <- c()
res1out <- move_columns(res1out, c(outcome, var), .before="Estimate")
res1out[,3:length(res1out)] <- round(res1out[,3:length(res1out)], 3)
group.ORs <- rbind(group.ORs, res1out)
}
group.ORs.lim <- group.ORs[,c("outcome", "var", "OR", "CI.low", "CI.hi", "Pr(>|z|)")]
group.ORs.lim$str <- paste0(round(group.ORs.lim$OR,2), " (",
round(group.ORs.lim$CI.low,2), "-" ,
round(group.ORs.lim$CI.hi,2), ")")
print(group.ORs.lim[,c(1:6)])
## outcome var OR CI.low CI.hi Pr(>|z|)
## 1 badMH (Intercept) 0.108 0.093 0.126 0.000
## 2 badMH age.group15 1.026 0.949 1.110 0.520
## 3 badMH age.group17-19 1.085 0.956 1.231 0.206
## 4 badMH gender3Female 2.155 1.993 2.331 0.000
## 5 badMH gender3Other 7.409 6.080 9.028 0.000
## 6 badMH urbanicity.z 1.071 0.967 1.187 0.187
## 7 badMH home.parents3One Parent 1.703 1.562 1.857 0.000
## 8 badMH home.parents3Other 1.036 0.856 1.255 0.714
## 9 badMH Ma_eduSecondary School 1.206 1.107 1.314 0.000
## 10 badMH Ma_eduPrimary School 1.577 1.325 1.877 0.000
## 11 badMH Ma_eduDont know/Doesnt apply 1.083 0.970 1.208 0.157
## 12 badMH covid.periodCOVID-years 1.440 1.338 1.549 0.000
## 13 selfharm.x5 (Intercept) 0.054 0.045 0.064 0.000
## 14 selfharm.x5 age.group15 1.005 0.912 1.107 0.925
## 15 selfharm.x5 age.group17-19 1.174 1.008 1.368 0.040
## 16 selfharm.x5 gender3Female 3.025 2.730 3.352 0.000
## 17 selfharm.x5 gender3Other 12.812 10.387 15.804 0.000
## 18 selfharm.x5 urbanicity.z 1.090 0.975 1.219 0.131
## 19 selfharm.x5 home.parents3One Parent 1.844 1.662 2.044 0.000
## 20 selfharm.x5 home.parents3Other 1.369 1.103 1.700 0.004
## 21 selfharm.x5 Ma_eduSecondary School 1.082 0.973 1.203 0.146
## 22 selfharm.x5 Ma_eduPrimary School 1.664 1.359 2.038 0.000
## 23 selfharm.x5 Ma_eduDont know/Doesnt apply 0.970 0.843 1.118 0.677
## 24 selfharm.x5 covid.periodCOVID-years 0.946 0.862 1.037 0.236
## 25 suic.att (Intercept) 0.041 0.036 0.047 0.000
## 26 suic.att age.group15 0.919 0.821 1.029 0.143
## 27 suic.att age.group17-19 1.287 1.088 1.522 0.003
## 28 suic.att gender3Female 1.967 1.756 2.202 0.000
## 29 suic.att gender3Other 5.387 4.252 6.826 0.000
## 30 suic.att urbanicity.z 1.088 1.023 1.158 0.007
## 31 suic.att home.parents3One Parent 1.964 1.750 2.205 0.000
## 32 suic.att home.parents3Other 1.767 1.398 2.233 0.000
## 33 suic.att Ma_eduSecondary School 1.278 1.134 1.441 0.000
## 34 suic.att Ma_eduPrimary School 1.771 1.416 2.215 0.000
## 35 suic.att Ma_eduDont know/Doesnt apply 1.158 0.993 1.351 0.062
## 36 suic.att covid.periodCOVID-years 1.190 1.072 1.320 0.001
# plot predicted probabilities of each outcome for each group
upper=0.6
for (DV in c(1:3)) {
mass.prob <- c()
var.n =0
for (x in c("gender3", "age.group", "urbanicity.z", "Ma_edu", "home.parents3")) {
# not included in plots: covid.period, or mat ed not known, or intermediary urbanicity
var.n = var.n+1
# select just the highest & lowest levels of urbanicity (1 and 5) for visualisation
if (x=="urbanicity.z"){
pred.prob <- data.frame(effect(x, groupdiffs[[DV]],
se=TRUE, confidence.level = .95, type="response",
xlevels= list(urbanicity.z = c(-1, 2.7))))
} else {
pred.prob <- data.frame(effect(x, groupdiffs[[DV]],
se=TRUE, confidence.level = .95, type="response"))
}
pred.prob$var <- x # variable name e.g. gender
names(pred.prob)[1] <- "group"
pred.prob$group <- as.character(pred.prob$group)
pred.prob <- move_columns(pred.prob, "var", .before=1)
pred.prob$group.full <- paste(pred.prob$var, pred.prob$group, sep="-") # combine var & level strings
mass.prob <- rbind(mass.prob, pred.prob)
}
mass.prob$order <- seq(1, length(mass.prob$group))
mass.prob$group[mass.prob$group=="Other" & mass.prob$var=="gender3"] <- "TGD"
mass.prob$group <- factor(mass.prob$group, levels=c("Male" , "Female" , "TGD",
"15","16" ,"17-19",
"College or University","Secondary School","Primary School","Dont know/Doesnt apply",
"Two Parents", "One Parent" , "Other",
"-1" ,"2.7"),
labels=c("Male" , "Female" , "Trans/gender-diverse",
"Aged 15","Aged 16" ,"Aged 17-19",
"Mother 3rd level edu","Mother secondary edu","Mother primary edu","Unknown mother edu",
"Two Parents", "One Parent" , "Other family make-up",
"Most rural" ,"Most urban"))
mass.prob$var <- factor(mass.prob$var, levels=c("gender3" , "age.group" ,"Ma_edu", "home.parents3","urbanicity.z"),
labels=c("Gender identity" , "Age" ,"Maternal education", "Family make-up","Urbanicity"))
# hide "unknown maternal education" from graph for simplicity
mass.prob <- mass.prob[!str_detect(mass.prob$group, "Unknown mother edu"),]
g<-ggplot(mass.prob, aes(x=group, y=fit, ymin=lower, ymax=upper, group=var, fill=var))+
geom_hline(yintercept=mod.avgs$prob[DV], linetype="dashed", size=0.7)+
geom_col(alpha=0.6)+
labs(title=dvlist_nice[DV])+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,upper),
breaks=seq(0,upper,0.10), minor_breaks = NULL,
name="")+
geom_text( stat="identity", aes(label= paste0(round(fit*100, digits=0), "%"),y=0.02, colour=var),
size=3, fontface="bold")+ #vjust=-1.2,
geom_text(aes(label= paste0("- - Sample Average (",round(mod.avgs$prob[DV]*100, digits=1), "%)"),
x=14.1, y=0.52), hjust=1.0, vjust=0, size=4.5, colour="black")+
geom_errorbar(aes(colour=var),width=0)+
theme_bw()+
scale_fill_manual(values=c( "steelblue4", "lightblue3", "darkseagreen3", "darkseagreen4","navajowhite3"))+ #"darkorange4"
scale_colour_manual(values=c( "steelblue4", "lightblue3", "darkseagreen3", "darkseagreen4", "navajowhite3"))+
theme(axis.text.x =element_text(angle=30, hjust=1, vjust=1),
axis.title.y=element_blank(),
axis.title.x=element_blank(),
legend.title=element_blank(),
plot.title = element_text(face="bold", size=14, hjust=0.5))
print(g)
}
Note switch of dataframe from py to py.full
# descriptive table showing differences in demographics (and the need to control for them)
table1(~ gender3 + age.group + ethnicity3 + grade
+ home.parents4 + Ma_edu # + home.language
+ school.type + munic.area | school.type,
data=py.full)
| Mainstream (N=22474) |
YouthReach (N=600) |
Overall (N=23074) |
|
|---|---|---|---|
| gender3 | |||
| Male | 11148 (49.6%) | 289 (48.2%) | 11437 (49.6%) |
| Female | 10705 (47.6%) | 289 (48.2%) | 10994 (47.6%) |
| Other | 531 (2.4%) | 20 (3.3%) | 551 (2.4%) |
| Missing | 90 (0.4%) | 2 (0.3%) | 92 (0.4%) |
| age.group | |||
| 15 | 7778 (34.6%) | 30 (5.0%) | 7808 (33.8%) |
| 16 | 12647 (56.3%) | 141 (23.5%) | 12788 (55.4%) |
| 17 or older | 1972 (8.8%) | 416 (69.3%) | 2388 (10.3%) |
| Missing | 77 (0.3%) | 13 (2.2%) | 90 (0.4%) |
| ethnicity3 | |||
| White Irish | 13598 (60.5%) | 349 (58.2%) | 13947 (60.4%) |
| White non-Irish | 2097 (9.3%) | 64 (10.7%) | 2161 (9.4%) |
| Black, Asian or other minority ethnicity | 1966 (8.7%) | 66 (11.0%) | 2032 (8.8%) |
| Not Asked | 4726 (21.0%) | 117 (19.5%) | 4843 (21.0%) |
| Missing | 87 (0.4%) | 4 (0.7%) | 91 (0.4%) |
| grade | |||
| 2nd Year | 7 (0.0%) | 1 (0.2%) | 8 (0.0%) |
| 3rd Year | 1031 (4.6%) | 11 (1.8%) | 1042 (4.5%) |
| 4th Year | 15450 (68.7%) | 29 (4.8%) | 15479 (67.1%) |
| 5th Year | 5352 (23.8%) | 67 (11.2%) | 5419 (23.5%) |
| LCA/QQI | 221 (1.0%) | 337 (56.2%) | 558 (2.4%) |
| 6th Year | 77 (0.3%) | 63 (10.5%) | 140 (0.6%) |
| Missing | 336 (1.5%) | 92 (15.3%) | 428 (1.9%) |
| home.parents4 | |||
| 2 Parents | 17594 (78.3%) | 306 (51.0%) | 17900 (77.6%) |
| 1 Parent | 2915 (13.0%) | 174 (29.0%) | 3089 (13.4%) |
| 1 Parent +other | 1040 (4.6%) | 44 (7.3%) | 1084 (4.7%) |
| Other | 826 (3.7%) | 68 (11.3%) | 894 (3.9%) |
| Missing | 99 (0.4%) | 8 (1.3%) | 107 (0.5%) |
| Ma_edu | |||
| College or University | 13296 (59.2%) | 123 (20.5%) | 13419 (58.2%) |
| Secondary School | 5167 (23.0%) | 175 (29.2%) | 5342 (23.2%) |
| Primary School | 825 (3.7%) | 92 (15.3%) | 917 (4.0%) |
| Dont know/Doesnt apply | 3037 (13.5%) | 198 (33.0%) | 3235 (14.0%) |
| Missing | 149 (0.7%) | 12 (2.0%) | 161 (0.7%) |
| school.type | |||
| Mainstream | 22474 (100%) | 0 (0%) | 22474 (97.4%) |
| YouthReach | 0 (0%) | 600 (100%) | 600 (2.6%) |
| munic.area | |||
| North Dublin | 4402 (19.6%) | 18 (3.0%) | 4420 (19.2%) |
| Cavan | 1656 (7.4%) | 99 (16.5%) | 1755 (7.6%) |
| Monaghan | 1740 (7.7%) | 52 (8.7%) | 1792 (7.8%) |
| Mayo | 4420 (19.7%) | 125 (20.8%) | 4545 (19.7%) |
| Roscommon | 1812 (8.1%) | 115 (19.2%) | 1927 (8.4%) |
| Galway | 6039 (26.9%) | 133 (22.2%) | 6172 (26.7%) |
| Galway_City | 2405 (10.7%) | 58 (9.7%) | 2463 (10.7%) |
# adjusted comparison of secondary school and early school leavers
py.full$suic.att <- relevel(py.full$suic.att, "No")
dvlist <- c("badMH", "selfharm.x5", "suic.att")
py.full$urbanicity.z <- scale(py.full$urbanicity,scale=F)
fullsamp <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ school.type + age.group + gender3 + urbanicity.z +
+ home.parents3 + Ma_edu + covid.period
+ (1|munic.area/school.id),
list(i = as.name(x)))),
data = py.full,
family=binomial() )} )
# plot predicted probabilities
upper=0.6
mass.prob <- c()
for (DV in c(1:3)) {
pred.prob <- data.frame(effect("school.type", fullsamp[[DV]],
se=TRUE, confidence.level = .95, type="response"))
pred.prob$var <- dvlist_nice[DV] # variable name e.g. gender
mass.prob <- rbind(mass.prob, pred.prob)
g<- ggplot(pred.prob, aes(x=school.type, y=fit, ymin=lower, ymax=upper, fill=school.type))+
geom_col(fill="grey40", alpha=0.4)+
labs(title=dvlist_nice[DV])+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,upper),
breaks=seq(0,upper,0.10), minor_breaks = NULL,
name="")+
geom_text( stat="identity", aes(label= paste0(round(fit*100, digits=0), "%"),y=0.02,),
size=3, colour="grey40", fontface="bold")+ #vjust=-1.2,
geom_errorbar(width=0, colour="grey40")+
theme_bw()+
theme(axis.text.x =element_text(angle=30, hjust=1, vjust=1),
axis.title.y=element_blank(),
axis.title.x=element_blank(),
plot.title = element_text(face="bold", size=14, hjust=0.5))
print(g)
}
# Extract Odds ratios for a table
group.ORs <- c()
for (DV in 1:3){
res1out <- as.data.frame(summary(fullsamp[[DV]])[["coefficients"]])
res1out$var <- rownames(res1out)
res1out$outcome <- dvlist[DV]
res1out$OR <- exp(res1out$Estimate)
res1out$CI.low<-exp(res1out$Estimate - (1.96*res1out$`Std. Error`))
res1out$CI.hi<-exp(res1out$Estimate + (1.96*res1out$`Std. Error`))
rownames(res1out) <- c()
res1out <- move_columns(res1out, c(outcome, var), .before="Estimate")
res1out[,3:length(res1out)] <- round(res1out[,3:length(res1out)], 3)
group.ORs <- rbind(group.ORs, res1out)
}
print(group.ORs[,c("outcome", "var", "OR", "CI.low", "CI.hi", "Pr(>|z|)")])
## outcome var OR CI.low CI.hi Pr(>|z|)
## 1 badMH (Intercept) 0.112 0.095 0.133 0.000
## 2 badMH school.typeYouthReach 1.557 1.210 2.004 0.001
## 3 badMH age.group16 0.973 0.902 1.049 0.470
## 4 badMH age.group17 or older 1.051 0.927 1.192 0.434
## 5 badMH gender3Female 2.198 2.041 2.367 0.000
## 6 badMH gender3Other 7.359 6.114 8.859 0.000
## 7 badMH urbanicity.z 1.077 0.964 1.204 0.188
## 8 badMH home.parents3One Parent 1.718 1.583 1.864 0.000
## 9 badMH home.parents3Other 1.103 0.924 1.318 0.277
## 10 badMH Ma_eduSecondary School 1.221 1.125 1.325 0.000
## 11 badMH Ma_eduPrimary School 1.509 1.280 1.779 0.000
## 12 badMH Ma_eduDont know/Doesnt apply 1.087 0.980 1.206 0.115
## 13 badMH covid.periodCOVID-years 1.388 1.294 1.488 0.000
## 14 selfharm.x5 (Intercept) 0.056 0.047 0.067 0.000
## 15 selfharm.x5 school.typeYouthReach 1.909 1.402 2.600 0.000
## 16 selfharm.x5 age.group16 1.000 0.912 1.098 0.994
## 17 selfharm.x5 age.group17 or older 1.180 1.014 1.373 0.032
## 18 selfharm.x5 gender3Female 2.976 2.702 3.277 0.000
## 19 selfharm.x5 gender3Other 12.155 9.983 14.799 0.000
## 20 selfharm.x5 urbanicity.z 1.097 0.984 1.223 0.095
## 21 selfharm.x5 home.parents3One Parent 1.829 1.659 2.016 0.000
## 22 selfharm.x5 home.parents3Other 1.467 1.202 1.791 0.000
## 23 selfharm.x5 Ma_eduSecondary School 1.074 0.971 1.189 0.167
## 24 selfharm.x5 Ma_eduPrimary School 1.536 1.269 1.859 0.000
## 25 selfharm.x5 Ma_eduDont know/Doesnt apply 0.929 0.813 1.061 0.276
## 26 selfharm.x5 covid.periodCOVID-years 0.947 0.868 1.033 0.217
## 27 suic.att (Intercept) 0.038 0.033 0.044 0.000
## 28 suic.att school.typeYouthReach 2.825 2.047 3.900 0.000
## 29 suic.att age.group16 1.097 0.985 1.222 0.092
## 30 suic.att age.group17 or older 1.412 1.195 1.668 0.000
## 31 suic.att gender3Female 2.040 1.834 2.270 0.000
## 32 suic.att gender3Other 5.747 4.617 7.154 0.000
## 33 suic.att urbanicity.z 1.103 1.039 1.170 0.001
## 34 suic.att home.parents3One Parent 1.938 1.738 2.161 0.000
## 35 suic.att home.parents3Other 1.883 1.523 2.329 0.000
## 36 suic.att Ma_eduSecondary School 1.274 1.137 1.428 0.000
## 37 suic.att Ma_eduPrimary School 1.666 1.354 2.050 0.000
## 38 suic.att Ma_eduDont know/Doesnt apply 1.116 0.966 1.290 0.135
## 39 suic.att covid.periodCOVID-years 1.169 1.060 1.289 0.002
First, look at changes post-COVID. Merge the two “COVID” timepoints together (2020, 2021) and the two post-COVID timepoints (2022, 2023) to simplify comparison and avoid regional bias.
dvlist <- c("badMH", "selfharm.x5", "suic.att")
trend <- lapply(dvlist, function(x) {
glmer(eval(substitute(i ~ survey.period
+ gender3 + age.group +
+ home.parents3 + Ma_edu
+ (1|munic.area/school.id),
list(i = as.name(x)))),
data = py[py$survey.year>2018,],
family=binomial() )} )
trends_ORs<- c()
trend.prob <- c()
trend.plots <- vector("list", length = 3)
for (DV in 1:3) {
time.coef <- as.data.frame(summary(trend[[DV]])[["coefficients"]])
time.coef <- time.coef[2,]
time.coef$CI.low <- time.coef$Estimate - (1.96*time.coef$`Std. Error`)
time.coef$CI.hi <- time.coef$Estimate + (1.96*time.coef$`Std. Error`)
time.coef <- move_columns(time.coef, c(CI.low, CI.hi), .after="Estimate")
time.coef[,1:3] <- exp(time.coef[,1:3])
names(time.coef) <- c("OR", "CI.low", "CI.hi", "SE_logodds", "z", "p")
time.coef <- round(time.coef, 3)
time.coef$DV <- dvlist[DV]
time.coef$fixef <- rownames(time.coef)
trends_ORs <- rbind(trends_ORs, time.coef)
pred.prob <- data.frame(effect("survey.period", trend[[DV]],
se=TRUE, confidence.level = .95, type="response"))
pred.prob$DV <- dvlist[DV]
trend.prob <- rbind(trend.prob, pred.prob)
upper=.30
trend.plots[[DV]]<-ggplot(pred.prob, aes(x=survey.period, y=fit,ymin=lower, ymax=upper))+
geom_col(alpha=0.5, fill="black")+
geom_errorbar(width=0.06, colour="black")+
geom_text(stat="identity", aes(label= paste0(round(fit*100, digits=0), "%")), size=4, hjust=-0.3, vjust=-0.7)+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,upper),
name="")+
theme_classic()+
theme(axis.text.x = element_text(size=11),
axis.text.y = element_text(size=12),
axis.title.x=element_blank(),
plot.title = element_text(face="bold", hjust=0.5))
}
# plot predicted probability (& 95% cis of each outcome for 2020/1 and 2022/3)
ggarrange(trend.plots[[1]], trend.plots[[2]],trend.plots[[3]],
nrow=1, ncol=3)
For next analysis, limit dataset to the West (has before, during and after COVID timepoints). This includes 15,085 individuals from 77 schools in the West of Ireland (including an urban centre, Galway city [16% of participants]).
pyw <- py[py$region=="West",]
# check demographics across years in the west
table1(~gender3 + age.group + ethnicity3 + grade + munic.area + home.parents3 + home.language + Ma_edu | survey.year, data=pyw, overall=F)
## Warning in table1.formula(~gender3 + age.group + ethnicity3 + grade +
## munic.area + : Terms to the right of '|' in formula 'x' define table columns
## and are expected to be factors with meaningful labels.
| 2018 (N=4722) |
2020 (N=4828) |
2022 (N=5122) |
|
|---|---|---|---|
| gender3 | |||
| Male | 2337 (49.5%) | 2360 (48.9%) | 2505 (48.9%) |
| Female | 2358 (49.9%) | 2374 (49.2%) | 2469 (48.2%) |
| Other | 0 (0%) | 77 (1.6%) | 139 (2.7%) |
| Missing | 27 (0.6%) | 17 (0.4%) | 9 (0.2%) |
| age.group | |||
| 16 | 3025 (64.1%) | 3094 (64.1%) | 3109 (60.7%) |
| 15 | 1439 (30.5%) | 1341 (27.8%) | 1452 (28.3%) |
| 17-19 | 240 (5.1%) | 378 (7.8%) | 560 (10.9%) |
| Missing | 18 (0.4%) | 15 (0.3%) | 1 (0.0%) |
| ethnicity3 | |||
| White Irish | 0 (0%) | 4077 (84.4%) | 3976 (77.6%) |
| White non-Irish | 0 (0%) | 408 (8.5%) | 525 (10.3%) |
| Black, Asian or other minority ethnicity | 0 (0%) | 313 (6.5%) | 613 (12.0%) |
| Not Asked | 4722 (100%) | 0 (0%) | 0 (0%) |
| Missing | 0 (0%) | 30 (0.6%) | 8 (0.2%) |
| grade | |||
| 4th Year | 3288 (69.6%) | 3546 (73.4%) | 3743 (73.1%) |
| 5th Year | 1382 (29.3%) | 1176 (24.4%) | 1253 (24.5%) |
| LCA/QQI | 0 (0%) | 83 (1.7%) | 57 (1.1%) |
| 6th Year | 10 (0.2%) | 0 (0%) | 61 (1.2%) |
| Missing | 42 (0.9%) | 23 (0.5%) | 8 (0.2%) |
| munic.area | |||
| North Dublin | 0 (0%) | 0 (0%) | 0 (0%) |
| Cavan | 0 (0%) | 0 (0%) | 0 (0%) |
| Monaghan | 0 (0%) | 0 (0%) | 0 (0%) |
| Mayo | 1463 (31.0%) | 1479 (30.6%) | 1478 (28.9%) |
| Roscommon | 511 (10.8%) | 629 (13.0%) | 672 (13.1%) |
| Galway County | 1975 (41.8%) | 1966 (40.7%) | 2097 (40.9%) |
| Galway City | 773 (16.4%) | 754 (15.6%) | 875 (17.1%) |
| home.parents3 | |||
| Two Parents | 3749 (79.4%) | 3845 (79.6%) | 3973 (77.6%) |
| One Parent | 871 (18.4%) | 834 (17.3%) | 825 (16.1%) |
| Other | 84 (1.8%) | 126 (2.6%) | 312 (6.1%) |
| Missing | 18 (0.4%) | 23 (0.5%) | 12 (0.2%) |
| home.language | |||
| English only | 3894 (82.5%) | 3874 (80.2%) | 3669 (71.6%) |
| English and another | 621 (13.2%) | 725 (15.0%) | 978 (19.1%) |
| Non-English language only | 193 (4.1%) | 209 (4.3%) | 462 (9.0%) |
| Missing | 14 (0.3%) | 20 (0.4%) | 13 (0.3%) |
| Ma_edu | |||
| College or University | 2415 (51.1%) | 2482 (51.4%) | 3380 (66.0%) |
| Secondary School | 1188 (25.2%) | 1080 (22.4%) | 1233 (24.1%) |
| Primary School | 337 (7.1%) | 261 (5.4%) | 94 (1.8%) |
| Dont know/Doesnt apply | 751 (15.9%) | 949 (19.7%) | 402 (7.8%) |
| Missing | 31 (0.7%) | 56 (1.2%) | 13 (0.3%) |
# run multivariate models
pyw$year <- as.factor(pyw$survey.year)
covid.eff <- c()
# model1: bad mental health
covid_mlm1 <- glmer(badMH.f ~ year
+ gender3 + age.group +
+ home.parents3 + Ma_edu
+ (1|munic.area/school.id),
data=pyw, family=binomial())
summary(covid_mlm1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: badMH.f ~ year + gender3 + age.group + +home.parents3 + Ma_edu +
## (1 | munic.area/school.id)
## Data: pyw
##
## AIC BIC logLik -2*log(L) df.resid
## 12659.4 12765.4 -6315.7 12631.4 14273
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4835 -0.4875 -0.3906 -0.3092 3.7031
##
## Random effects:
## Groups Name Variance Std.Dev.
## school.id:munic.area (Intercept) 0.020575 0.14344
## munic.area (Intercept) 0.007042 0.08392
## Number of obs: 14287, groups: school.id:munic.area, 75; munic.area, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.26821 0.07689 -29.500 < 2e-16 ***
## year2020 0.27279 0.05513 4.948 7.48e-07 ***
## year2022 -0.11750 0.05919 -1.985 0.04712 *
## gender3Female 0.74823 0.04980 15.023 < 2e-16 ***
## gender3Other 2.07569 0.14881 13.948 < 2e-16 ***
## age.group15 -0.01350 0.05116 -0.264 0.79180
## age.group17-19 0.17459 0.08364 2.087 0.03684 *
## home.parents3One Parent 0.55617 0.05493 10.124 < 2e-16 ***
## home.parents3Other 0.11840 0.12382 0.956 0.33897
## Ma_eduSecondary School 0.17663 0.05473 3.228 0.00125 **
## Ma_eduPrimary School 0.32171 0.10079 3.192 0.00141 **
## Ma_eduDont know/Doesnt apply 0.06144 0.06914 0.889 0.37415
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) yr2020 yr2022 gndr3F gndr3O ag.g15 a.17-1 hm.3OP hm.p3O
## year2020 -0.402
## year2022 -0.406 0.518
## gender3Feml -0.402 0.004 0.007
## gender3Othr -0.080 -0.059 -0.125 0.202
## age.group15 -0.206 0.017 0.012 -0.048 0.002
## ag.grp17-19 -0.090 -0.031 -0.082 0.020 -0.003 0.193
## hm.prnts3OP -0.159 0.025 0.009 -0.001 0.005 -0.015 -0.035
## hm.prnts3Ot -0.014 -0.015 -0.093 -0.038 -0.020 -0.084 -0.051 0.108
## M_dScndrySc -0.249 0.026 0.050 0.002 0.002 0.027 -0.027 -0.031 0.033
## M_dPrmrySch -0.159 0.041 0.134 -0.003 -0.019 0.001 -0.052 -0.056 -0.010
## M_dDnknw/Da -0.234 -0.037 0.117 0.105 -0.009 0.005 -0.034 -0.075 -0.034
## M_dScS M_dPrS
## year2020
## year2022
## gender3Feml
## gender3Othr
## age.group15
## ag.grp17-19
## hm.prnts3OP
## hm.prnts3Ot
## M_dScndrySc
## M_dPrmrySch 0.188
## M_dDnknw/Da 0.265 0.166
adj.cov <- summary(covid_mlm1)[["coefficients"]]
adj.cov <- adj.cov[rownames(adj.cov)=="year2020" | rownames(adj.cov)=="year2022",]
rownames(adj.cov) <- paste0("badMH_", rownames(adj.cov))
covid.eff <- rbind(covid.eff, adj.cov)
# model3: frequent self-harm
covid_mlm2 <- glmer(selfharm.x5 ~ year
+ gender3 + age.group +
+ home.parents4 + Ma_edu
+ (1|munic.area/school.id),
data=pyw, family=binomial())
summary(covid_mlm2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: selfharm.x5 ~ year + gender3 + age.group + +home.parents4 + Ma_edu +
## (1 | munic.area/school.id)
## Data: pyw
##
## AIC BIC logLik -2*log(L) df.resid
## 9093.0 9206.3 -4531.5 9063.0 14099
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6662 -0.3820 -0.2853 -0.2195 5.4269
##
## Random effects:
## Groups Name Variance Std.Dev.
## school.id:munic.area (Intercept) 0.055250 0.23505
## munic.area (Intercept) 0.003843 0.06199
## Number of obs: 14114, groups: school.id:munic.area, 75; munic.area, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.13974 0.09174 -34.223 < 2e-16 ***
## year2020 0.15718 0.07106 2.212 0.02696 *
## year2022 0.23583 0.07196 3.277 0.00105 **
## gender3Female 1.04103 0.06555 15.882 < 2e-16 ***
## gender3Other 2.65872 0.15514 17.137 < 2e-16 ***
## age.group15 -0.05246 0.06343 -0.827 0.40818
## age.group17-19 0.19923 0.10055 1.981 0.04754 *
## home.parents41 Parent 0.58047 0.07438 7.804 5.99e-15 ***
## home.parents41 Parent +other 0.88284 0.11044 7.994 1.31e-15 ***
## home.parents4Other 0.38367 0.13697 2.801 0.00509 **
## Ma_eduSecondary School 0.01913 0.06875 0.278 0.78081
## Ma_eduPrimary School 0.48236 0.11796 4.089 4.33e-05 ***
## Ma_eduDont know/Doesnt apply 0.06444 0.08652 0.745 0.45642
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
adj.cov <- summary(covid_mlm2)[["coefficients"]]
adj.cov <- adj.cov[rownames(adj.cov)=="year2020" | rownames(adj.cov)=="year2022",]
rownames(adj.cov) <- paste0("selfharmX5_", rownames(adj.cov))
covid.eff <- rbind(covid.eff, adj.cov)
# model4: suicide attempt
covid_mlm3 <- glmer(suic.att ~ year
+ gender3 + age.group +
+ home.parents4 + Ma_edu
+ (1|munic.area/school.id),
data=pyw, family=binomial())
summary(covid_mlm3)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: suic.att ~ year + gender3 + age.group + +home.parents4 + Ma_edu +
## (1 | munic.area/school.id)
## Data: pyw
##
## AIC BIC logLik -2*log(L) df.resid
## 7708.3 7821.6 -3839.2 7678.3 14057
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.1402 -0.3164 -0.2687 -0.2111 5.9121
##
## Random effects:
## Groups Name Variance Std.Dev.
## school.id:munic.area (Intercept) 0.055148 0.23484
## munic.area (Intercept) 0.002273 0.04767
## Number of obs: 14072, groups: school.id:munic.area, 75; munic.area, 4
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.17296 0.09774 -32.464 < 2e-16 ***
## year2020 0.13833 0.07767 1.781 0.074923 .
## year2022 0.02825 0.08104 0.349 0.727385
## gender3Female 0.68287 0.07000 9.756 < 2e-16 ***
## gender3Other 1.71715 0.17671 9.717 < 2e-16 ***
## age.group15 -0.10420 0.07201 -1.447 0.147908
## age.group17-19 0.35216 0.10609 3.319 0.000902 ***
## home.parents41 Parent 0.64218 0.08140 7.890 3.03e-15 ***
## home.parents41 Parent +other 0.83300 0.12183 6.837 8.08e-12 ***
## home.parents4Other 0.66865 0.14295 4.678 2.90e-06 ***
## Ma_eduSecondary School 0.15098 0.07582 1.991 0.046431 *
## Ma_eduPrimary School 0.48145 0.12890 3.735 0.000188 ***
## Ma_eduDont know/Doesnt apply 0.12006 0.09461 1.269 0.204433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
adj.cov <- summary(covid_mlm3)[["coefficients"]]
adj.cov <- adj.cov[rownames(adj.cov)=="year2020" | rownames(adj.cov)=="year2022",]
rownames(adj.cov) <- paste0("suicatt_", rownames(adj.cov))
covid.eff <- rbind(covid.eff, adj.cov)
covid.eff <- as.data.frame(covid.eff)
covid.eff$OR <- exp(covid.eff$Estimate)
covid.eff$CI.low<-exp(covid.eff$Estimate - (1.96*covid.eff$`Std. Error`))
covid.eff$CI.hi<-exp(covid.eff$Estimate + (1.96*covid.eff$`Std. Error`))
covid.eff <- round(covid.eff, 3)
# plot model-derived probability of each outcome before, during & after COVID (West data only)
predict1 = as.data.frame(effect("year", covid_mlm1,
se=TRUE, confidence.level = .95
# typical="response", type="response" # these don't make a difference here it seems
))
predict1$outcome <- "badMH"
predict2 = as.data.frame(effect("year", covid_mlm2,
se=TRUE, confidence.level = .95
))
predict2$outcome <- "self-harmX5"
predict3 = as.data.frame(effect("year", covid_mlm3,
se=TRUE, confidence.level = .95
))
predict3$outcome <- "suic.att"
predict <- rbind(predict1, predict2, predict3)
predict$year <- as.numeric(as.character(predict$year))
ggplot(data=predict, aes(x=year, y=fit, group=outcome, colour=outcome))+
geom_smooth(method="loess", se=T)+
geom_errorbar(aes(ymin=lower, ymax=upper), width=0.1)+
labs(title="% of teens reporting various mental health outcomes",
x="Year of survey")+
scale_y_continuous(labels = scales::percent_format(accuracy = 5L), limits = c(0,.35),
name="Predicted Probability")+
theme_bw()+
theme(panel.grid.minor.x=element_blank())