EpiSmile 1st Grade Latvia Oral Health

Author

IM - SU

Published

May 27, 2026

1 Packages

Code

pacman::p_load(
  tidyverse,
  gtsummary,
  skimr,
  here,
  scales,
  broom,
  sjPlot,
  stringr,
  janitor
)

Code

here::here()

[1] "/home/sergiouribe/Insync/sergio.uribe@gmail.com/Google Drive - Shared with me/Epidemiologijas petijumi/2025 MIH un kariess 6-8_gadi/2025_epiSmile_analysis_1st_grade_latvia"

2 Dataset

3 EDA

Code

# df |> 
#   skimr::skim()

Code

df <- df |>
  select(
    vecums_gados,
    dzimums,
    regions,
    piena_molaru_hipomineralizacija,
    skolas_kods_divi_cipari,
    prevalence_primary_d5,
    prevalence_primary_d3,
    prevalence_primary_d1,
    d3mfs_primary,
    d1mfs_primary,
    d5mfs_primary
  )

Code

df <- df |>
  mutate(
    prevalence_primary_d5 = as.factor(prevalence_primary_d5),
    prevalence_primary_d3 = as.factor(prevalence_primary_d3),
    prevalence_primary_d1 = as.factor(prevalence_primary_d1),
    regions = as.factor(regions),
    dzimums = as.factor(dzimums),
    piena_molaru_hipomineralizacija =
      as.factor(piena_molaru_hipomineralizacija),
    skolas_kods_divi_cipari =
      as.factor(skolas_kods_divi_cipari)
  )

3.1 recoding

Code

df <- df |>
  mutate(
    across(
      starts_with("prevalence_primary_d"),
      ~ case_when(
        str_detect(., "Caries") ~ 1,
        . == "No" ~ 0,
        TRUE ~ NA_real_
      )
    )
  )

3.2 Data cleaning

Code

df <- df |>
  mutate(
    vecums_gados =
      str_trim(vecums_gados) |>
      as.character() |>
      str_replace("^0+", "") |>
      as.numeric()
  )

Code

df <- df |>
  mutate(
    skolas_kods_divi_cipari =
      as.character(skolas_kods_divi_cipari),

    skolas_kods_divi_cipari =
      if_else(
        skolas_kods_divi_cipari == "a",
        "01",
        skolas_kods_divi_cipari
      ),

    skolas_kods_divi_cipari =
      as.factor(skolas_kods_divi_cipari)
  )

4 Models

4.1 Prevalence

Code

model_prevalence <- glm(
  prevalence_primary_d5 ~
    vecums_gados +
    regions +
    dzimums +
    piena_molaru_hipomineralizacija,
  data = df,
  family = binomial(link = "logit")
)

Code

tbl_regression(
  model_prevalence,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()

Characteristic	N	OR	95% CI	p-value
Vecums (gados)	822	1.12	0.75, 1.70	0.6
Reģions
Pierīga	407	—	—
Rīga	415	1.29	0.95, 1.75	0.10
Dzimums
Meitene	412	—	—
Zēns	410	1.28	0.94, 1.73	0.12
Piena molāru hipomineralizācija
Jā	33	—	—
Nē	789	1.10	0.49, 2.29	0.8
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Code

plot_model(
  model_prevalence,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3
)

4.2 Severity

Code

model_d5mfs <- glm(
  d5mfs_primary ~
    vecums_gados +
    regions +
    dzimums +
    piena_molaru_hipomineralizacija,
  data = df,
  family = gaussian()
)

Code

tbl_regression(
  model_d5mfs
) |> 
  add_n(location = "level") |> 
  bold_labels()

Characteristic	N	Beta	95% CI	p-value
Vecums (gados)	822	1.9	0.13, 3.7	0.036
Reģions
Pierīga	407	—	—
Rīga	415	0.06	-1.3, 1.4	>0.9
Dzimums
Meitene	412	—	—
Zēns	410	1.6	0.24, 3.0	0.021
Piena molāru hipomineralizācija
Jā	33	—	—
Nē	789	1.4	-2.1, 4.9	0.4
Abbreviation: CI = Confidence Interval

Code

plot_model(
  model_d5mfs,
  type = "est",
  show.values = TRUE,
  value.offset = 0.3
)

5 New analysis 30 May 2026

Dependent: 1.”Prevalence_primary_d5” (binominal) 2. “Prevalence_primary_d3” 3. “Prevalence_primary_d1” Independent: “Vecums (gados)” “Reģions” “Dzimums

5.1 Model Prevalence d5

Code

model_d5 <- glm(
  prevalence_primary_d5 ~ 
    vecums_gados +
    regions +
    dzimums,
  data = df,
  family = binomial(link = "logit")
)

Code

tbl_regression(
  model_d5,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()

Characteristic	N	OR	95% CI	p-value
Vecums (gados)	822	1.13	0.75, 1.70	0.6
Reģions
Pierīga	407	—	—
Rīga	415	1.29	0.95, 1.75	0.10
Dzimums
Meitene	412	—	—
Zēns	410	1.27	0.94, 1.73	0.12
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Code

plot_model(
  model_d5,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3
)

Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.

5.2 Model Prevalence d5 with school codes

Code

model_d5_school <- glm(
  prevalence_primary_d5 ~
    vecums_gados +
    regions +
    dzimums +
    skolas_kods_divi_cipari,
  data = df,
  family = binomial(link = "logit")
)

Code

tbl_regression(
  model_d5_school,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()

Characteristic	N	OR	95% CI	p-value
Vecums (gados)	822	1.24	0.82, 1.92	0.3
Reģions
Pierīga	407	—	—
Rīga	415	0.18	0.01, 2.04	0.2
Dzimums
Meitene	412	—	—
Zēns	410	1.24	0.91, 1.70	0.2
Skolas kods (divi cipari)
01	126	—	—
03	147	1.13	0.68, 1.87	0.6
05	40	3.68	1.45, 11.3	0.011
06	35	0.79	0.37, 1.74	0.6
07	85	10.5	0.89, 248	0.069
08	57	26.1	2.04, 652	0.015
09	101	6.94	0.60, 161	0.13
10	16	6.14	0.45, 160	0.2
12	89	7.59	0.65, 177	0.12
14	62	1.54	0.79, 3.10	0.2
16	27	3.80	0.30, 92.6	0.3
17	37	8.75	0.70, 213	0.10
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Code

plot_model(
  model_d5_school,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3,
  title = "Prevalence d5 in Primary Teeth, 6 years old children, Latvia, 2026",
  axis.lim = c(0.001, 1000)
)

Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
ℹ The deprecated feature was likely used in the sjPlot package.
  Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.

Code

plot_model(
  model_d5_school,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3,
  title = "Prevalence d5 in Primary Teeth, 6 years old children, Latvia, 2026"
) +
  scale_y_log10(
    limits = c(0.001, 1000),
    labels = scales::label_log(),
    oob = scales::squish
  )

Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.

Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

Code

plot_model(
  model_d5_school,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3,
  colors = c("#EC3D49", "#4B0023", "#c8c8dc")
) +
  scale_y_log10(
    limits = c(0.001, 1000),
    labels = scales::label_log(),
    oob = scales::squish
  )

Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Skolas kods (divi cipari)`. This may result in unexpected
  behaviour. Please rename your variables (e.g.,
  `Skolas.kods..divi.cipari.` instead of `Skolas kods (divi cipari)`) and
  fit the model again.

Scale for y is already present.
Adding another scale for y, which will replace the existing scale.

Code

ggsave(
  filename = "plot_prevalence_d5_school.png",
  # plot = p_d5,
  width = 20,
  height = 16,
  units = "cm",
  dpi = 300
)

5.3 Model Prevalence d3

Code

model_d3 <- glm(
  prevalence_primary_d3 ~
    vecums_gados +
    regions +
    dzimums,
  data = df,
  family = binomial(link = "logit")
)

Code

tbl_regression(
  model_d3,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()

Characteristic	N	OR	95% CI	p-value
Vecums (gados)	822	1.07	0.69, 1.69	0.8
Reģions
Pierīga	407	—	—
Rīga	415	1.35	0.97, 1.89	0.078
Dzimums
Meitene	412	—	—
Zēns	410	1.27	0.91, 1.78	0.2
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Code

plot_model(
  model_d3,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3
)

Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.

5.4 Model Prevalence d1

Code

model_d1 <- glm(
  prevalence_primary_d1 ~
    vecums_gados +
    regions +
    dzimums,
  data = df,
  family = binomial(link = "logit")
)

Code

tbl_regression(
  model_d1,
  exponentiate = TRUE
) |> 
  add_n(location = "level") |> 
  bold_labels()

Characteristic	N	OR	95% CI	p-value
Vecums (gados)	822	1.09	0.60, 2.12	0.8
Reģions
Pierīga	407	—	—
Rīga	415	1.44	0.90, 2.32	0.13
Dzimums
Meitene	412	—	—
Zēns	410	1.12	0.70, 1.79	0.6
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Code

plot_model(
  model_d1,
  type = "est",
  transform = "exp",
  show.values = TRUE,
  value.offset = 0.3
)

Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.
Warning: Looks like you are using syntactically invalid variable names, quoted in
  backticks: `Vecums (gados)`. This may result in unexpected behaviour.
  Please rename your variables (e.g., `Vecums..gados.` instead of `Vecums
  (gados)`) and fit the model again.