EpiSmile 1st Grade Latvia Oral Health

Author

IM - SU

Published

May 27, 2026

1 Packages

Code
pacman::p_load(
  tidyverse,
  gtsummary,
  skimr,
  here,
  scales,
  broom,
  sjPlot,
  stringr,
  janitor
)
Code
here::here()
[1] "/home/sergiouribe/Insync/sergio.uribe@gmail.com/Google Drive - Shared with me/Epidemiologijas petijumi/2025 MIH un kariess 6-8_gadi/2025_epiSmile_analysis_1st_grade_latvia"

2 Dataset

3 EDA

Code
# df |> 
#   skimr::skim()
Code
df <- df |>
  select(
    vecums_gados,
    dzimums,
    regions,
    piena_molaru_hipomineralizacija,
    skolas_kods_divi_cipari,
    prevalence_primary_d5,
    prevalence_primary_d3,
    prevalence_primary_d1,
    d3mfs_primary,
    d1mfs_primary,
    d5mfs_primary
  )
Code
df <- df |>
  mutate(
    prevalence_primary_d5 = as.factor(prevalence_primary_d5),
    prevalence_primary_d3 = as.factor(prevalence_primary_d3),
    prevalence_primary_d1 = as.factor(prevalence_primary_d1),
    regions = as.factor(regions),
    dzimums = as.factor(dzimums),
    piena_molaru_hipomineralizacija =
      as.factor(piena_molaru_hipomineralizacija),
    skolas_kods_divi_cipari =
      as.factor(skolas_kods_divi_cipari)
  )

3.1 recoding

Code
df <- df |>
  mutate(
    across(
      starts_with("prevalence_primary_d"),
      ~ case_when(
        str_detect(., "Caries") ~ 1,
        . == "No" ~ 0,
        TRUE ~ NA_real_
      )
    )
  )

3.2 Data cleaning

Code
df <- df |>
  mutate(
    vecums_gados =
      str_trim(vecums_gados) |>
      as.character() |>
      str_replace("^0+", "") |>
      as.numeric()
  )
Code
df <- df |>
  mutate(
    skolas_kods_divi_cipari =
      as.character(skolas_kods_divi_cipari),

    skolas_kods_divi_cipari =
      if_else(
        skolas_kods_divi_cipari == "a",
        "01",
        skolas_kods_divi_cipari
      ),

    skolas_kods_divi_cipari =
      as.factor(skolas_kods_divi_cipari)
  )

4 Models

4.1 Prevalence

Code
model_prevalence <- glm(
  prevalence_primary_d5 ~
    vecums_gados +
    regions +
    dzimums +
    piena_molaru_hipomineralizacija,
  data = df,
  family = binomial(link = "logit")
)
Code
tbl_regression(
  model_prevalence,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()
Characteristic N OR 95% CI p-value
Vecums (gados) 822 1.12 0.75, 1.70 0.6
Reģions



    Pierīga 407
    Rīga 415 1.29 0.95, 1.75 0.10
Dzimums



    Meitene 412
    Zēns 410 1.28 0.94, 1.73 0.12
Piena molāru hipomineralizācija



    Jā 33
    Nē 789 1.10 0.49, 2.29 0.8
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
Code
plot_model(
  model_prevalence,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3
)

4.2 Severity

Code
model_d5mfs <- glm(
  d5mfs_primary ~
    vecums_gados +
    regions +
    dzimums +
    piena_molaru_hipomineralizacija,
  data = df,
  family = gaussian()
)
Code
tbl_regression(
  model_d5mfs
) |> 
  add_n(location = "level") |> 
  bold_labels()
Characteristic N Beta 95% CI p-value
Vecums (gados) 822 1.9 0.13, 3.7 0.036
Reģions



    Pierīga 407
    Rīga 415 0.06 -1.3, 1.4 >0.9
Dzimums



    Meitene 412
    Zēns 410 1.6 0.24, 3.0 0.021
Piena molāru hipomineralizācija



    Jā 33
    Nē 789 1.4 -2.1, 4.9 0.4
Abbreviation: CI = Confidence Interval
Code
plot_model(
  model_d5mfs,
  type = "est",
  show.values = TRUE,
  value.offset = 0.3
)

5 New analysis 30 May 2026

Dependent: 1.”Prevalence_primary_d5” (binominal) 2. “Prevalence_primary_d3” 3. “Prevalence_primary_d1” Independent: “Vecums (gados)” “Reģions” “Dzimums

5.1 Model Prevalence d5

Code
model_d5 <- glm(
  prevalence_primary_d5 ~ 
    vecums_gados +
    regions +
    dzimums,
  data = df,
  family = binomial(link = "logit")
)
Code
tbl_regression(
  model_d5,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()
Characteristic N OR 95% CI p-value
Vecums (gados) 822 1.13 0.75, 1.70 0.6
Reģions



    Pierīga 407
    Rīga 415 1.29 0.95, 1.75 0.10
Dzimums



    Meitene 412
    Zēns 410 1.27 0.94, 1.73 0.12
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
Code
plot_model(
  model_d5,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3
)
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.

5.2 Model Prevalence d5 with school codes

Code
model_d5_school <- glm(
  prevalence_primary_d5 ~
    vecums_gados +
    regions +
    dzimums +
    skolas_kods_divi_cipari,
  data = df,
  family = binomial(link = "logit")
)
Code
tbl_regression(
  model_d5_school,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()
Characteristic N OR 95% CI p-value
Vecums (gados) 822 1.24 0.82, 1.92 0.3
Reģions



    Pierīga 407
    Rīga 415 0.18 0.01, 2.04 0.2
Dzimums



    Meitene 412
    Zēns 410 1.24 0.91, 1.70 0.2
Skolas kods (divi cipari)



    01 126
    03 147 1.13 0.68, 1.87 0.6
    05 40 3.68 1.45, 11.3 0.011
    06 35 0.79 0.37, 1.74 0.6
    07 85 10.5 0.89, 248 0.069
    08 57 26.1 2.04, 652 0.015
    09 101 6.94 0.60, 161 0.13
    10 16 6.14 0.45, 160 0.2
    12 89 7.59 0.65, 177 0.12
    14 62 1.54 0.79, 3.10 0.2
    16 27 3.80 0.30, 92.6 0.3
    17 37 8.75 0.70, 213 0.10
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
Code
plot_model(
  model_d5_school,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3,
  title = "Prevalence d5 in Primary Teeth, 6 years old children, Latvia, 2026",
  axis.lim = c(0.001, 1000)
)
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
ℹ The deprecated feature was likely used in the sjPlot package.
  Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.

Code
plot_model(
  model_d5_school,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3,
  title = "Prevalence d5 in Primary Teeth, 6 years old children, Latvia, 2026"
) +
  scale_y_log10(
    limits = c(0.001, 1000),
    labels = scales::label_log(),
    oob = scales::squish
  )
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

Code
plot_model(
  model_d5_school,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3,
  colors = c("#EC3D49", "#4B0023", "#c8c8dc")
) +
  scale_y_log10(
    limits = c(0.001, 1000),
    labels = scales::label_log(),
    oob = scales::squish
  )
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

Code
ggsave(
  filename = "plot_prevalence_d5_school.png",
  # plot = p_d5,
  width = 20,
  height = 16,
  units = "cm",
  dpi = 300
)

5.3 Model Prevalence d3

Code
model_d3 <- glm(
  prevalence_primary_d3 ~
    vecums_gados +
    regions +
    dzimums,
  data = df,
  family = binomial(link = "logit")
)
Code
tbl_regression(
  model_d3,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()
Characteristic N OR 95% CI p-value
Vecums (gados) 822 1.07 0.69, 1.69 0.8
Reģions



    Pierīga 407
    Rīga 415 1.35 0.97, 1.89 0.078
Dzimums



    Meitene 412
    Zēns 410 1.27 0.91, 1.78 0.2
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
Code
plot_model(
  model_d3,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3
)
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.

5.4 Model Prevalence d1

Code
model_d1 <- glm(
  prevalence_primary_d1 ~
    vecums_gados +
    regions +
    dzimums,
  data = df,
  family = binomial(link = "logit")
)
Code
tbl_regression(
  model_d1,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()
Characteristic N OR 95% CI p-value
Vecums (gados) 822 1.09 0.60, 2.12 0.8
Reģions



    Pierīga 407
    Rīga 415 1.44 0.90, 2.32 0.13
Dzimums



    Meitene 412
    Zēns 410 1.12 0.70, 1.79 0.6
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
Code
plot_model(
  model_d1,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3
)
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.