MPH Modul - V108.10.26 Applied Biostatistics: Regression Models and Confounding

Modularbeit

Authors

Mirco Cassina

Anna Falkowski

Alexander Schurz

Andreas Strässle

Published

February 6, 2026

Code
library(pacman)
pacman::p_load(
  rio, # data import
  dplyr, # data preparation
  tidyverse, # data preparation
  gtsummary, # table 1
  parameters, # Convert model output
  flextable, # output files
  officer, # output files
  ggeffects) # model presentation

1 Study objective

  • What is the association between physical fights in the previous 12 months and gaming in students in grade 9 to 12 in the US?
  • There is an association between physical fights in the previous 12 months and gaming.

2 Methods

Variables Levels Variable used in model Reference level
fight_cat

0 = 0 times

1 = 1 time

2 = 2 or 3 times

3 = 4 or 5 times

4 = 6 or 7 times

5 = 8 or 9 times

6 = 10 or 11 times

7 = 12 or more times

fight:

1 = No (0 times)

2 = Yes (levels 1-7)

1 = No (0 times)
gaming_cat

1 = not at all

2 = <1 hour per day

3 = 1 hours per day

4 = 2 hours per day

5 = 3 hours per day

6 = 4 hours per day

7 = 5 or more hours per day

gaming:

1 = <1 hour per day (levels 1,2)

2 = ≥1 hour per day (levels 3-7)

1 = <1 hour per day
Code
df.yrbs.raw <- rio::import("../data/yrbs.csv", header = TRUE)
Code
df.yrbs.ad <- df.yrbs.raw %>%
  mutate(
    age = as.numeric(age),
    sex = factor(sex, levels = c(1,2),
                 labels = c("Male", "Female")),
    sex = fct_explicit_na(sex, na_level = "Missing"),
    race_cat = factor(race4, levels = c(1:4),
                      labels = c("White", "Hispanic incl. multiple Hisp.",
                                 "Black or African American", "Other")),
    height = as.numeric(height),
    weight = as.numeric(weight),
    bmi = round(weight / height^2, 2),
    grade = as.factor(grade),
    grade = forcats::fct_recode(grade, other = "88"),
    e_bullied = factor(e_bullied, levels = c(0,1),
                       labels = c("no", "yes")),
    fight_cat = factor(fight, levels = c(0:7),
                       labels = c("0 times", "1 time", "2-3 times",
                                  "4-5 times", "6-7 times",
                                  "8-9 times", "10-11 times",
                                  "≥12 times")),
    gaming_cat = factor(gaming, levels = c(1:7),
                        labels = c("Not at all", "<1 hours per day",
                        "1 hour per day", "2 hours per day",
                        "3 hours per day", "4 hours per day",
                        "≥5 hours per day")),
    unsafe_school = factor(unsafe_school, levels = c(0:4),
                           labels = c("0 days", "1 day", "2 or 3 days",
                                      "4 or 5 days", "≥6 days")),
    active = as.numeric(active)) %>% 
  select(age, sex, race_cat, height, weight, bmi, grade, e_bullied, 
         fight_cat, gaming_cat, unsafe_school, active)
Code
visdat::vis_miss(df.yrbs.ad, warn_large_data = FALSE)

3 Results

3.1 Participant characteristics table

Code
tbl.demo <- df.yrbs.ad %>%
  select(
    age, sex, race_cat,
    height, weight, bmi, 
    grade, e_bullied, 
    fight_cat, gaming_cat, unsafe_school, active) %>%
  gtsummary::tbl_summary(
    by = sex,
    type = list(age ~ "continuous", active ~ "continuous"),
    percent = "row",
    statistic = list(
      all_continuous() ~ "{mean} ({sd})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    label = list(
      age = "Age (years)",
      sex = "Sex",
      race_cat = "Race",
      height = "Height (meter)",
      weight = "Weight (kilogram)",
      bmi = "Body mass index",
      grade = "Grade at school",
      e_bullied = "Electronical bullying in last 12 months",
      fight_cat = "Physical fight in last 12 months",
      gaming_cat = "Computer use during school days (e.g., gaming)",
      unsafe_school = "Days of no school during last 30 days",
      active = "Physically active past 7 days (≥ 60min)"),
    missing_text = "Missing",
    missing_stat = "{N_miss} ({p_miss}%)",
    missing = "ifany",
    digits = all_continuous() ~ 2
  ) %>%
  add_overall(last = TRUE) %>% 
  bold_labels()


tbl.demo %>%
  as_flex_table() %>%
  autofit() %>%
  bold(part = "header") %>%
  align(j = 1, align = "left", part = "all")

Participant characteristics

Characteristic

Male
N = 6,6411

Female
N = 6,8851

Missing
N = 1511

Overall
N = 13,6771

Age (years)

16.00 (1.24)

15.89 (1.23)

14.95 (1.76)

15.94 (1.24)

Missing

9 (0.1%)

6 (<0.1%)

57 (38%)

72 (0.5%)

Race

White

3,244 (49%)

3,398 (51%)

26 (0.4%)

6,668 (100%)

Hispanic incl. multiple Hisp.

1,466 (48%)

1,563 (51%)

9 (0.3%)

3,038 (100%)

Black or African American

1,010 (50%)

1,024 (50%)

6 (0.3%)

2,040 (100%)

Other

722 (48%)

756 (51%)

15 (1.0%)

1,493 (100%)

Missing

199 (3.0%)

144 (2.1%)

95 (63%)

438 (3.2%)

Height (meter)

1.76 (0.08)

1.63 (0.07)

NA (NA)

1.69 (0.10)

Missing

677 (10%)

709 (10%)

151 (100%)

1,537 (11%)

Weight (kilogram)

73.64 (18.49)

62.38 (15.13)

NA (NA)

67.91 (17.78)

Missing

677 (10%)

709 (10%)

151 (100%)

1,537 (11%)

Body mass index

23.73 (5.41)

23.50 (5.33)

NA (NA)

23.61 (5.37)

Missing

677 (10%)

709 (10%)

151 (100%)

1,537 (11%)

Grade at school

9

1,716 (47%)

1,902 (52%)

19 (0.5%)

3,637 (100%)

10

1,837 (49%)

1,853 (50%)

27 (0.7%)

3,717 (100%)

11

1,651 (50%)

1,657 (50%)

14 (0.4%)

3,322 (100%)

12

1,391 (49%)

1,440 (51%)

19 (0.7%)

2,850 (100%)

other

18 (46%)

14 (36%)

7 (18%)

39 (100%)

Missing

28 (0.4%)

19 (0.3%)

65 (43%)

112 (0.8%)

Electronical bullying in last 12 months

694 (32%)

1,400 (65%)

44 (2.1%)

2,138 (100%)

Missing

96 (1.4%)

66 (1.0%)

30 (20%)

192 (1.4%)

Physical fight in last 12 months

0 times

3,738 (45%)

4,591 (55%)

58 (0.7%)

8,387 (100%)

1 time

628 (58%)

447 (41%)

9 (0.8%)

1,084 (100%)

2-3 times

491 (63%)

274 (35%)

12 (1.5%)

777 (100%)

4-5 times

144 (63%)

79 (35%)

4 (1.8%)

227 (100%)

6-7 times

64 (74%)

22 (25%)

1 (1.1%)

87 (100%)

8-9 times

27 (73%)

8 (22%)

2 (5.4%)

37 (100%)

10-11 times

15 (75%)

5 (25%)

0 (0%)

20 (100%)

≥12 times

120 (79%)

28 (18%)

4 (2.6%)

152 (100%)

Missing

1,414 (21%)

1,431 (21%)

61 (40%)

2,906 (21%)

Computer use during school days (e.g., gaming)

Not at all

842 (35%)

1,558 (64%)

26 (1.1%)

2,426 (100%)

<1 hours per day

755 (52%)

663 (46%)

21 (1.5%)

1,439 (100%)

1 hour per day

744 (55%)

599 (44%)

14 (1.0%)

1,357 (100%)

2 hours per day

1,060 (52%)

950 (47%)

14 (0.7%)

2,024 (100%)

3 hours per day

1,032 (51%)

964 (48%)

23 (1.1%)

2,019 (100%)

4 hours per day

652 (51%)

624 (48%)

11 (0.9%)

1,287 (100%)

≥5 hours per day

1,253 (48%)

1,345 (51%)

27 (1.0%)

2,625 (100%)

Missing

303 (4.6%)

182 (2.6%)

15 (9.9%)

500 (3.7%)

Days of no school during last 30 days

0 days

6,076 (49%)

6,164 (50%)

91 (0.7%)

12,331 (100%)

1 day

241 (39%)

365 (59%)

9 (1.5%)

615 (100%)

2 or 3 days

153 (38%)

233 (58%)

16 (4.0%)

402 (100%)

4 or 5 days

50 (49%)

49 (48%)

3 (2.9%)

102 (100%)

≥6 days

86 (57%)

54 (36%)

11 (7.3%)

151 (100%)

Missing

35 (0.5%)

20 (0.3%)

21 (14%)

76 (0.6%)

Physically active past 7 days (≥ 60min)

4.12 (2.54)

3.28 (2.43)

3.47 (2.60)

3.69 (2.52)

Missing

270 (4.1%)

175 (2.5%)

12 (7.9%)

457 (3.3%)

1Mean (SD); n (%)

Code
tbl.demo.docx <- tbl.demo
tbl.demo.docx %>%
  as_flex_table() %>%
  set_caption("Table. Participant characteristics, separated by sex.") %>%
  flextable::set_table_properties(width = 1, layout = "autofit") %>%
  flextable::fontsize(size = 8, part = "all") %>%
  flextable::font(fontname = "Times New Roman", part = "all") %>%
  flextable::save_as_docx(path = "../tables/table1.docx")

3.2 Model development

Code
df.yrbs.ad.c <- df.yrbs.ad %>% 
  mutate(
    fight = if_else(fight_cat == "0 times", 0L, 1L),
    fight = factor(fight, levels = c(0, 1), labels = c("No", "Yes")),
    gaming = if_else(
      gaming_cat %in% c("Not at all", "<1 hours per day"),0L, 1L),
    gaming = factor(gaming, levels = c(0, 1),
      labels = c("< 1 hour per day", "≥ 1 hour per day"))) %>% 
  select(age, sex, race_cat, fight, gaming, unsafe_school, active) %>% 
  filter(!is.na(sex) & sex != "Missing") %>%  # drop missing data and those "Missing" for sex
  droplevels() %>% 
  drop_na()
Code
DataExplorer::plot_correlation(df.yrbs.ad)

  • Confounder: none

\[ \begin{aligned} \log\left(\frac{P(fight_i = 1 \mid X_i)}{P(fight_i = 0 \mid X_i)}\right) = \beta_0 + \beta_1 \cdot \mathrm{Gaming}_{i, \ge 1 \text{ hour/day}} \end{aligned} \]

Reference levels

  • Gaming: < 1 hour/day (reference)
Code
# Model 1 
  glm.model1 <- glm(
    fight ~ gaming,
    data = df.yrbs.ad.c,
    family = binomial(link = "logit"))
  
  # Extract adjusted ORs
  glm.model1.ad <- parameters::model_parameters(
    glm.model1,
    exponentiate = TRUE,
    drop = "Intercept",
    digits = 2)
  
  # Adapt output
  glm.model1.final <- glm.model1.ad %>%
    mutate(
      `Odds Ratio` = round(Coefficient, 2),
      `95% CI` = paste0(round(CI_low, 2), " – ", round(CI_high, 2)),
      Parameter = case_when(
        Parameter == "gaming≥ 1 hour per day" ~ "Gaming: ≥1 hour per day",
        Parameter == "age" ~ "Age",
        Parameter == "sexFemale" ~ "Sex: Female",
        Parameter == "race_catHispanic incl. multiple Hisp." ~ "Race: Hispanic incl. multiple Hisp.",
        Parameter == "race_catBlack or African American" ~ "Race: Black or African American",
        Parameter == "race_catOther" ~ "Race: Other",
        Parameter == "unsafe_school1 day" ~ "Unsafe school: 1 day",
        Parameter == "unsafe_school2 or 3 days" ~ "Unsafe school: 2 or 3 days",
        Parameter == "unsafe_school4 or 5 days" ~ "Unsafe school: 4 or 5 days",
        Parameter == "unsafe_school≥6 days" ~ "Unsafe school: ≥ 6 days",
        Parameter == "active" ~ "Physically active (≥ 60min/d in previous 7 days)",
        TRUE ~ Parameter
      )
    ) %>%
    select(Parameter, `Odds Ratio`, `95% CI`)
    
ft.model1.final <- flextable(glm.model1.final)
ft.model1.final <- ft.model1.final %>%
  set_caption("Table. Model 1 (unadjusted) on association of fighting and gaming") %>%
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all")
ft.model1.final

Parameter

Odds Ratio

95% CI

Gaming: ≥1 hour per day

1.04

0.94 – 1.16

Code
ft.model1.final %>%
  set_caption("Model 1") %>%
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all") %>%
  flextable::line_spacing(space = 1.0) %>%
  flextable::fontsize(size = 8, part = "all") %>%
  flextable::font(fontname = "Times New Roman", part = "all") %>%
  flextable::save_as_docx(path = "../tables/ft.model1.docx")
  • Confounder: age, sex, race_cat

\[ \begin{aligned} \log\left(\frac{P(fight_i = 1 \mid X_i)}{P(fight_i = 0 \mid X_i)}\right) = \beta_0 + \beta_1 \cdot \mathrm{Gaming}_{i, \ge 1 \text{ hour/day}} + \beta_2 \cdot \mathrm{Age}_i + \beta_3 \cdot \mathrm{Sex}_{i, \mathrm{Female}} + \\ + \beta_4 \cdot \mathrm{Race}_{i, \mathrm{Hispanic \ incl. \ multiple \ Hisp.}} + \beta_5 \cdot \mathrm{Race}_{i, \mathrm{Black \ or \ African \ American}} + \beta_6 \cdot \mathrm{Race}_{i, \mathrm{Other}} \end{aligned} \]

Reference levels

  • Gaming: < 1 hour/day (reference)

  • Sex: Male (reference)

  • Race: White, non-Hispanic

Code
# Model 2
  glm.model2 <- glm(
    fight ~ gaming + age + sex + race_cat,
    data = df.yrbs.ad.c,
    family = binomial(link = "logit"))
  
  # Extract adjusted ORs
  glm.model2.ad <- parameters::model_parameters(
    glm.model2,
    exponentiate = TRUE,
    drop = "Intercept",
    digits = 2)
  
  # Adapt output
  glm.model2.final <- glm.model2.ad %>%
    mutate(
      `Odds Ratio` = round(Coefficient, 2),
      `95% CI` = paste0(round(CI_low, 2), " – ", round(CI_high, 2)),
      Parameter = case_when(
        Parameter == "gaming≥ 1 hour per day" ~ "Gaming: ≥1 hour per day",
        Parameter == "age" ~ "Age",
        Parameter == "sexFemale" ~ "Sex: Female",
        Parameter == "race_catHispanic incl. multiple Hisp." ~ "Race: Hispanic incl. multiple Hisp.",
        Parameter == "race_catBlack or African American" ~ "Race: Black or African American",
        Parameter == "race_catOther" ~ "Race: Other",
        Parameter == "unsafe_school1 day" ~ "Unsafe school: 1 day",
        Parameter == "unsafe_school2 or 3 days" ~ "Unsafe school: 2 or 3 days",
        Parameter == "unsafe_school4 or 5 days" ~ "Unsafe school: 4 or 5 days",
        Parameter == "unsafe_school≥6 days" ~ "Unsafe school: ≥ 6 days",
        Parameter == "active" ~ "Physically active (≥ 60min/d in previous 7 days)",
        TRUE ~ Parameter
      )
    ) %>%
    select(Parameter, `Odds Ratio`, `95% CI`)
    
ft.model2.final <- flextable(glm.model2.final)
ft.model2.final <- ft.model2.final %>%
  set_caption("Table. Model 2 on association of fighting and gaming") %>%
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all")
ft.model2.final

Parameter

Odds Ratio

95% CI

Gaming: ≥1 hour per day

0.95

0.86 – 1.06

Age

0.86

0.82 – 0.89

Sex: Female

0.45

0.41 – 0.5

Race: Hispanic incl. multiple Hisp.

1.23

1.09 – 1.38

Race: Black or African American

1.85

1.61 – 2.12

Race: Other

1.27

1.09 – 1.49

Code
ft.model2.final %>%
  set_caption("Model 2") %>%
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all") %>%
  flextable::line_spacing(space = 1.0) %>%
  flextable::fontsize(size = 8, part = "all") %>%
  flextable::font(fontname = "Times New Roman", part = "all") %>%
  flextable::save_as_docx(path = "../tables/ft.model2.docx")
  • Confounder: age, sex, race_cat, active, unsafe_school

\[ \begin{aligned} \log\left(\frac{P(fight_i = 1 \mid X_i)}{P(fight_i = 0 \mid X_i)}\right) &= \beta_0 + \beta_1 \cdot \mathrm{Gaming}_{i, \ge 1 \text{ hour/day}} + \beta_2 \cdot \mathrm{Age}_i + \beta_3 \cdot \mathrm{Sex}_{i, \mathrm{Female}} \\ &\quad + \beta_4 \cdot \mathrm{Race}_{i, \mathrm{Hispanic \ incl. \ multiple \ Hisp.}} + \beta_5 \cdot \mathrm{Race}_{i, \mathrm{Black \ or \ African \ American}} + \beta_6 \cdot \mathrm{Race}_{i, \mathrm{Other}} + \\ &\quad + \beta_7 \cdot \mathrm{UnsafeSchool}_{i, \mathrm{1\ day}} + \beta_8 \cdot \mathrm{UnsafeSchool}_{i, \mathrm{2\ or\ 3\ days}} + \beta_9 \cdot \mathrm{UnsafeSchool}_{i, \mathrm{4\ or\ 5\ days}} + \\ &\quad + \beta_{10} \cdot \mathrm{UnsafeSchool}_{i, \mathrm{\ge 6\ days}} + \beta_{11} \cdot \mathrm{Active}_i \end{aligned} \]

Reference levels

  • Gaming: < 1 hour/day (reference)

  • Sex: Male (reference)

  • Race: White, non-Hispanic

  • UnsafeSchool: 0 days

  • Active: Not physically active

Code
# Model 3
glm.model3 <- glm(
  fight ~ gaming + age + sex + race_cat + unsafe_school + active,
  data = df.yrbs.ad.c,
  family = binomial)

# Extract adjusted ORs
glm.model3.ad <- parameters::model_parameters(
  glm.model3,
  exponentiate = TRUE,
  drop = "Intercept",
  digits = 2)

# Adapt output
glm.model3.final <- glm.model3.ad %>%
  mutate(
    `Odds Ratio` = round(Coefficient, 2),
    `95% CI` = paste0(round(CI_low, 2), " – ", round(CI_high, 2)),
    Parameter = case_when(
      Parameter == "gaming≥ 1 hour per day" ~ "Gaming: ≥1 hour per day",
      Parameter == "age" ~ "Age",
      Parameter == "sexFemale" ~ "Sex: Female",
      Parameter == "race_catHispanic incl. multiple Hisp." ~ "Race: Hispanic incl. multiple Hisp.",
      Parameter == "race_catBlack or African American" ~ "Race: Black or African American",
      Parameter == "race_catOther" ~ "Race: Other",
      Parameter == "unsafe_school1 day" ~ "Unsafe school: 1 day",
      Parameter == "unsafe_school2 or 3 days" ~ "Unsafe school: 2 or 3 days",
      Parameter == "unsafe_school4 or 5 days" ~ "Unsafe school: 4 or 5 days",
      Parameter == "unsafe_school≥6 days" ~ "Unsafe school: ≥ 6 days",
      Parameter == "active" ~ "Physically active (≥ 60min/d in previous 7 days)",
      TRUE ~ Parameter
    )
  ) %>%
  select(Parameter, `Odds Ratio`, `95% CI`)
  
ft.model3.final <- flextable(glm.model3.final)
ft.model3.final <- ft.model3.final %>%
  set_caption("Table. Model 3 on association of fighting and gaming") %>%
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all")
ft.model3.final

Parameter

Odds Ratio

95% CI

Gaming: ≥1 hour per day

0.97

0.87 – 1.09

Age

0.86

0.83 – 0.9

Sex: Female

0.45

0.41 – 0.5

Race: Hispanic incl. multiple Hisp.

1.18

1.05 – 1.34

Race: Black or African American

1.83

1.59 – 2.11

Race: Other

1.25

1.06 – 1.46

Unsafe school: 1 day

2.15

1.74 – 2.65

Unsafe school: 2 or 3 days

2.91

2.25 – 3.75

Unsafe school: 4 or 5 days

3.61

2.12 – 6.1

Unsafe school: ≥ 6 days

5.67

3.67 – 8.86

Physically active (≥ 60min/d in previous 7 days)

1.04

1.02 – 1.06

Code
ft.model3.final %>%
  set_caption("Model 3") %>%
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all") %>%
  flextable::line_spacing(space = 1.0) %>%
  flextable::fontsize(size = 8, part = "all") %>%
  flextable::font(fontname = "Times New Roman", part = "all") %>%
  flextable::save_as_docx(path = "../tables/ft.model3.docx")

Reference levels

  • Gaming: < 1 hour/day (reference)

  • Age: >14 years (reference)

  • Sex: Male (reference)

  • Race: White, non-Hispanic

  • UnsafeSchool: 0 days

  • Active: Not physically active

Code
# Model 4
df.yrbs.ad.c.2 <- df.yrbs.ad.c %>%
  mutate(age_cat = factor(
    ifelse(age <= 14, "≤14", ">14"),
    levels = c("≤14", ">14")))

df.yrbs.ad.c.2$age_cat <- relevel(df.yrbs.ad.c.2$age_cat, ref = ">14")

glm.model4 <- glm(
  fight ~ gaming + age_cat + sex + race_cat + unsafe_school + active,
  data = df.yrbs.ad.c.2,
  family = binomial)

# Extract adjusted ORs
glm.model4.ad <- parameters::model_parameters(
  glm.model4,
  exponentiate = TRUE,
  drop = "Intercept",
  digits = 2)

# Adapt output
glm.model4.final <- glm.model4.ad %>%
  mutate(
    `Odds Ratio` = round(Coefficient, 2),
    `95% CI` = paste0(round(CI_low, 2), " – ", round(CI_high, 2)),
    Parameter = case_when(
      Parameter == "gaming≥ 1 hour per day" ~ "Gaming: ≥1 hour per day",
      Parameter == "age_cat≤14" ~ "Age: ≤14 years",
      Parameter == "sexFemale" ~ "Sex: Female",
      Parameter == "race_catHispanic incl. multiple Hisp." ~ "Race: Hispanic incl. multiple Hisp.",
      Parameter == "race_catBlack or African American" ~ "Race: Black or African American",
      Parameter == "race_catOther" ~ "Race: Other",
      Parameter == "unsafe_school1 day" ~ "Unsafe school: 1 day",
      Parameter == "unsafe_school2 or 3 days" ~ "Unsafe school: 2 or 3 days",
      Parameter == "unsafe_school4 or 5 days" ~ "Unsafe school: 4 or 5 days",
      Parameter == "unsafe_school≥6 days" ~ "Unsafe school: ≥ 6 days",
      Parameter == "active" ~ "Physically active (≥ 60min/d in previous 7 days)",
      TRUE ~ Parameter
    )
  ) %>%
  select(Parameter, `Odds Ratio`, `95% CI`)
  
ft.model4.final <- flextable(glm.model4.final)
ft.model4.final <- ft.model4.final %>%
  set_caption("Table. Model 4 on association of fighting and gaming, age: categorical (≤14y/>14y)") %>%
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all")
ft.model4.final

Parameter

Odds Ratio

95% CI

Gaming: ≥1 hour per day

0.99

0.89 – 1.11

Age: ≤14 years

1.22

1.05 – 1.41

Sex: Female

0.46

0.42 – 0.51

Race: Hispanic incl. multiple Hisp.

1.18

1.05 – 1.34

Race: Black or African American

1.82

1.58 – 2.09

Race: Other

1.25

1.06 – 1.47

Unsafe school: 1 day

2.17

1.76 – 2.67

Unsafe school: 2 or 3 days

2.90

2.24 – 3.73

Unsafe school: 4 or 5 days

3.46

2.03 – 5.84

Unsafe school: ≥ 6 days

5.80

3.76 – 9.04

Physically active (≥ 60min/d in previous 7 days)

1.05

1.02 – 1.07

Code
# Only a few individuals were aged 12 or 13 years
table(df.yrbs.ad.c.2$fight, df.yrbs.ad.c.2$age_cat)
     
       >14  ≤14
  No  7062  895
  Yes 1894  281

Reference levels

  • Gaming: < 1 hour/day (reference)

  • Age: 12 years (reference)

  • Race: White, non-Hispanic

  • UnsafeSchool: 0 days

  • Active: Not physically active

Code
# Model 5: Subgroup analysis for sex

# Male
glm.model5.male <- glm(
  fight ~ gaming + age + race_cat + unsafe_school + active,
  data = df.yrbs.ad.c %>% 
    filter(sex == "Male"),
  family = binomial)

# Female
glm.model5.female <- glm(
  fight ~ gaming + age + race_cat + unsafe_school + active,
  data = df.yrbs.ad.c %>% 
    filter(sex == "Female"),
  family = binomial)

# Extract adjusted ORs
# Male
glm.model5.ad.male <- parameters::model_parameters(
  glm.model5.male,
  exponentiate = TRUE,
  drop = "Intercept",
  digits = 2)

# Female
glm.model5.ad.female <- parameters::model_parameters(
  glm.model5.female,
  exponentiate = TRUE,
  drop = "Intercept",
  digits = 2)

# Adapt output
glm.model5.ad.male <- glm.model5.ad.male %>%
  mutate(
    `Odds Ratio` = round(Coefficient, 2),
    `95% CI` = paste0(round(CI_low, 2), " – ", round(CI_high, 2)),
    Parameter = case_when(
      Parameter == "gaming≥ 1 hour per day" ~ "Gaming: ≥1 hour per day",
      Parameter == "age" ~ "Age",
      Parameter == "race_catHispanic incl. multiple Hisp." ~ "Race: Hispanic incl. multiple Hisp.",
      Parameter == "race_catBlack or African American" ~ "Race: Black or African American",
      Parameter == "race_catOther" ~ "Race: Other",
      Parameter == "unsafe_school1 day" ~ "Unsafe school: 1 day",
      Parameter == "unsafe_school2 or 3 days" ~ "Unsafe school: 2 or 3 days",
      Parameter == "unsafe_school4 or 5 days" ~ "Unsafe school: 4 or 5 days",
      Parameter == "unsafe_school≥6 days" ~ "Unsafe school: ≥ 6 days",
      Parameter == "active" ~ "Physically active (≥ 60min/d in previous 7 days)",
      TRUE ~ Parameter)) %>%
  select(Parameter, `Odds Ratio`, `95% CI`)

# Female
glm.model5.ad.female <- glm.model5.ad.female %>%
  mutate(
    `Odds Ratio` = round(Coefficient, 2),
    `95% CI` = paste0(round(CI_low, 2), " – ", round(CI_high, 2)),
    Parameter = case_when(
      Parameter == "gaming≥ 1 hour per day" ~ "Gaming: ≥1 hour per day",
      Parameter == "age" ~ "Age",
      Parameter == "race_catHispanic incl. multiple Hisp." ~ "Race: Hispanic incl. multiple Hisp.",
      Parameter == "race_catBlack or African American" ~ "Race: Black or African American",
      Parameter == "race_catOther" ~ "Race: Other",
      Parameter == "unsafe_school1 day" ~ "Unsafe school: 1 day",
      Parameter == "unsafe_school2 or 3 days" ~ "Unsafe school: 2 or 3 days",
      Parameter == "unsafe_school4 or 5 days" ~ "Unsafe school: 4 or 5 days",
      Parameter == "unsafe_school≥6 days" ~ "Unsafe school: ≥ 6 days",
      Parameter == "active" ~ "Physically active (≥ 60min/d in previous 7 days)",
      TRUE ~ Parameter)) %>%
  select(Parameter, `Odds Ratio`, `95% CI`)

# combined model
param_order_sex <- c(
  "Gaming: ≥1 hour per day",
  "Age",
  "Race: Black or African American",
  "Race: Hispanic incl. multiple Hisp.",
  "Race: Other",
  "Physically active (≥ 60min/d in previous 7 days)",
  "Unsafe school: 1 day",
  "Unsafe school: 2 or 3 days",
  "Unsafe school: 4 or 5 days",
  "Unsafe school: ≥ 6 days")

glm.combined.sex.wide <- glm.model5.ad.male %>%
  rename(
    `OR male` = `Odds Ratio`,
    `95% CI male` = `95% CI`) %>%
  left_join(
    glm.model5.ad.female %>%
      rename(
        `OR female` = `Odds Ratio`,
        `95% CI female` = `95% CI`
      ),
    by = "Parameter") %>%
  mutate(
    Parameter = factor(Parameter, levels = param_order_sex)) %>%
  arrange(Parameter)

# Combined table
ft.sex.combined <- flextable(glm.combined.sex.wide) 
ft.sex.combined %>% 
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all")

Parameter

OR male

95% CI male

OR female

95% CI female

Gaming: ≥1 hour per day

0.95

0.82 – 1.1

1.03

0.87 – 1.21

Age

0.87

0.82 – 0.92

0.85

0.79 – 0.9

Race: Black or African American

1.42

1.18 – 1.71

2.58

2.08 – 3.18

Race: Hispanic incl. multiple Hisp.

1.03

0.87 – 1.2

1.51

1.25 – 1.82

Race: Other

1.20

0.97 – 1.48

1.32

1.01 – 1.71

Physically active (≥ 60min/d in previous 7 days)

1.08

1.05 – 1.1

0.99

0.96 – 1.02

Unsafe school: 1 day

1.79

1.3 – 2.46

2.46

1.86 – 3.23

Unsafe school: 2 or 3 days

2.91

1.93 – 4.38

3.01

2.15 – 4.17

Unsafe school: 4 or 5 days

2.57

1.2 – 5.4

5.23

2.5 – 10.79

Unsafe school: ≥ 6 days

9.07

4.93 – 17.8

3.03

1.46 – 6.02

Code
table(df.yrbs.ad.c$sex, df.yrbs.ad.c.2$unsafe_school)
        
         0 days 1 day 2 or 3 days 4 or 5 days ≥6 days
  Male     4552   169          98          29      53
  Female   4704   281         179          31      36
Code
param_order <- c(
  "Gaming: ≥1 hour per day",
  "Age",
  "Sex: Female",
  "Race: Black or African American",
  "Race: Hispanic incl. multiple Hisp.",
  "Race: Other",
  "Physically active (≥ 60min/d in previous 7 days)",
  "Unsafe school: 1 day",
  "Unsafe school: 2 or 3 days",
  "Unsafe school: 4 or 5 days",
  "Unsafe school: ≥ 6 days")

glm.combined.wide <- glm.model1.final %>%
  rename(
    `OR Model 1` = `Odds Ratio`,
    `95% CI Model 1` = `95% CI`) %>%
  full_join(
    glm.model2.final %>%
      rename(
        `OR Model 2` = `Odds Ratio`,
        `95% CI Model 2` = `95% CI`
      ),
    by = "Parameter") %>%
  full_join(
    glm.model3.final %>%
      rename(
        `OR Model 3` = `Odds Ratio`,
        `95% CI Model 3` = `95% CI`
      ),
    by = "Parameter") %>% 
  mutate(
    Parameter = factor(Parameter, levels = param_order)) %>%
  arrange(Parameter)

# Combined table
ft.combined <- flextable(glm.combined.wide) 
ft.combined %>% 
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all")

Parameter

OR Model 1

95% CI Model 1

OR Model 2

95% CI Model 2

OR Model 3

95% CI Model 3

Gaming: ≥1 hour per day

1.04

0.94 – 1.16

0.95

0.86 – 1.06

0.97

0.87 – 1.09

Age

0.86

0.82 – 0.89

0.86

0.83 – 0.9

Sex: Female

0.45

0.41 – 0.5

0.45

0.41 – 0.5

Race: Black or African American

1.85

1.61 – 2.12

1.83

1.59 – 2.11

Race: Hispanic incl. multiple Hisp.

1.23

1.09 – 1.38

1.18

1.05 – 1.34

Race: Other

1.27

1.09 – 1.49

1.25

1.06 – 1.46

Physically active (≥ 60min/d in previous 7 days)

1.04

1.02 – 1.06

Unsafe school: 1 day

2.15

1.74 – 2.65

Unsafe school: 2 or 3 days

2.91

2.25 – 3.75

Unsafe school: 4 or 5 days

3.61

2.12 – 6.1

Unsafe school: ≥ 6 days

5.67

3.67 – 8.86

Code
ft.combined %>%
  autofit() %>%
  bold(part = "header") %>%
  align(align = "center", part = "all") %>%
  fontsize(size = 8, part = "all") %>%
  font(fontname = "Times New Roman", part = "all") %>%
  set_caption("Table. Association between gaming and fighting (Models 1, 2 and 3)") %>%
  flextable::save_as_docx(path = "../tables/ft.model.combined.docx")

4 Result presentation

Code
df.glm.plot <- glm.combined.wide %>%
  pivot_longer(
    cols = c(`OR Model 1`, `OR Model 2`, `OR Model 3`,
             `95% CI Model 1`, `95% CI Model 2`, `95% CI Model 3`),
    names_to = c(".value", "Model"),
    names_pattern = "(OR|95% CI) (Model [123])") %>%
  separate(
    `95% CI`,
    into = c("CI_low", "CI_high"),
    sep = " – ",
    convert = TRUE) %>%
  rename(
    `Odds Ratio` = OR,
    Variable_Parameter = Parameter) %>%
  mutate(
    Model = factor(Model, levels = c("Model 1", "Model 2", "Model 3")),
    Variable_Parameter = factor(
      Variable_Parameter,
      levels = rev(param_order)))

df.glm.plot <- df.glm.plot %>%
  mutate(
    Variable_Parameter = factor(
      Variable_Parameter,
      levels = rev(param_order)))

# Plot
plot.glm <- ggplot(
  df.glm.plot,
  aes(
    y = Variable_Parameter,
    x = `Odds Ratio`,
    xmin = CI_low,
    xmax = CI_high,
    color = Model)
) +
  geom_vline(xintercept = 1, linetype = "dashed") +
  geom_pointrange(
    position = position_dodge(width = 0.7),
    size = 0.4
  ) +
  scale_x_log10(
    breaks = c(0.25, 0.5, 1, 2, 4)
  ) +
  labs(
    x = "Odds Ratio",
    y = "",
    title = "Association between gaming and fighting"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    legend.position = "bottom",
    axis.text.y = element_text(hjust = 1),
    plot.title.position = "plot",
    plot.title = element_text(hjust = 0))

plot.glm

Code
ggsave(
  filename = "../figures/plot.models.combined.pdf",
  plot = plot.glm,
  width = 10, height = 8, 
  dpi = 1200)
Code
plot.model1 <- ggeffects::predict_response(glm.model1, terms = c("gaming"))
plot(plot.model1)

Code
plot.model2 <- ggeffects::predict_response(glm.model2, terms = c("gaming"))
plot(plot.model2)

Code
plot.model3 <- ggeffects::predict_response(glm.model3, terms = c("gaming"))
plot(plot.model3)

5 Session info

Code
sessionInfo()
R version 4.5.0 (2025-04-11)
Platform: x86_64-apple-darwin20
Running under: macOS Sequoia 15.7.3

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Zurich
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] ggeffects_2.3.2   officer_0.6.10    flextable_0.9.9   parameters_0.28.3
 [5] gtsummary_2.2.0   lubridate_1.9.4   forcats_1.0.0     stringr_1.5.1    
 [9] purrr_1.0.4       readr_2.1.5       tidyr_1.3.1       tibble_3.2.1     
[13] ggplot2_3.5.2     tidyverse_2.0.0   dplyr_1.1.4       rio_1.2.3        
[17] pacman_0.5.1     

loaded via a namespace (and not attached):
 [1] sjlabelled_1.2.0        tidyselect_1.2.1        farver_2.1.2           
 [4] R.utils_2.13.0          fastmap_1.2.0           TH.data_1.1-3          
 [7] fontquiver_0.2.1        bayestestR_0.17.0       digest_0.6.37          
[10] timechange_0.3.0        estimability_1.5.1      lifecycle_1.0.4        
[13] DataExplorer_0.8.3      survival_3.8-3          magrittr_2.0.3         
[16] compiler_4.5.0          rlang_1.1.6             tools_4.5.0            
[19] igraph_2.1.4            yaml_2.3.10             data.table_1.17.8      
[22] knitr_1.50              askpass_1.2.1           labeling_0.4.3         
[25] htmlwidgets_1.6.4       plyr_1.8.9              xml2_1.3.8             
[28] RColorBrewer_1.1-3      multcomp_1.4-28         withr_3.0.2            
[31] R.oo_1.27.1             grid_4.5.0              datawizard_1.3.0       
[34] gdtools_0.4.2           data.tree_1.1.0         xtable_1.8-4           
[37] emmeans_1.11.1          scales_1.4.0            MASS_7.3-65            
[40] insight_1.4.4           cli_3.6.5               mvtnorm_1.3-3          
[43] rmarkdown_2.29          ragg_1.4.0              generics_0.1.4         
[46] rstudioapi_0.17.1       reshape2_1.4.4          tzdb_0.5.0             
[49] splines_4.5.0           parallel_4.5.0          vctrs_0.6.5            
[52] Matrix_1.7-3            sandwich_3.1-1          jsonlite_2.0.0         
[55] fontBitstreamVera_0.1.1 hms_1.1.3               visdat_0.6.0           
[58] systemfonts_1.2.3       glue_1.8.0              codetools_0.2-20       
[61] stringi_1.8.7           gtable_0.3.6            pillar_1.10.2          
[64] htmltools_0.5.8.1       openssl_2.3.3           R6_2.6.1               
[67] networkD3_0.4.1         textshaping_1.0.1       evaluate_1.0.3         
[70] lattice_0.22-7          haven_2.5.5             R.methodsS3_1.8.2      
[73] cards_0.6.0             fontLiberation_0.1.0    Rcpp_1.0.14            
[76] zip_2.3.3               uuid_1.2-1              gridExtra_2.3          
[79] coda_0.19-4.1           xfun_0.52               zoo_1.8-14             
[82] pkgconfig_2.0.3