Packages

pacman::p_load(tidyverse, 
               tidymodels, 
               survey, # for cluster effects
               kableExtra, 
               htmlTable, # to save table1 objects as html
               here, 
               scales, 
               viridis, 
               ggmosaic, 
               gtsummary,
               gt, 
               performance, 
               modelsummary, 
               report, 
               patchwork, 
               janitor, 
               emmeans, 
               multicomp, # for some reason, is necessary to install the sjPlot
               sjstats, 
               sjPlot, 
               lmtest, # for Beta Regression for Percent and Proportion Data, check https://rcompanion.org/handbook/J_02.html
               betareg, 
               ggalluvial # alluvial plots
               )
theme_set(theme_minimal())
# set_theme()

Dataset

# data_consolidated <- read_csv(here("data", "data_consolidated.csv"))
data_consolidated <- read_rds(here("data", "data_consolidated.rds"))

EDA

table(data_consolidated$time) %>% 
  knitr::kable()
Var1 Freq
baseline 427
final 373

Should be 373

Check repeated kids

data_consolidated %>% 
  count(berna_kods) %>% 
  filter(n > 2)
## # A tibble: 0 × 2
## # … with 2 variables: berna_kods <dbl>, n <int>

Check children in the baseline but not in the final

data_consolidated %>% 
  count(berna_kods) %>% 
  filter(n < 2) %>% 
  .$berna_kods
##  [1]  28  36  43  51  54  63  69  80  87  91  95  99 117 119 120 124 125 134 138
## [20] 141 144 151 156 159 179 188 194 201 206 223 237 267 281 288 289 292 294 296
## [39] 300 323 324 325 329 335 339 346 368 378 389 393 396 401 407 414 418 423

TABLE 1

table1 <- data_consolidated %>%
  filter(time == "baseline") %>% 
  # mutate(number_of_arrested_lesions = as.numeric(number_of_arrested_lesions)) %>% 
  # mutate(age_years = as.integer(age_years)) %>%
  select(
    age_months,
    # age_years,
    dzimums,
    # time,
    # komplikacijas_minimalas,
    # komplikacijas_nopietnas,
    grupas_krasa,
    # zobu_tirisanas_biezums, 
    # zobu_pasta, 
    
    toothbrushing_freq = zobu_tirisanas_biezums,
    toothpaste = zobu_pasta,
    sweets_daily = saldumi_ikdiena,
    plaque = aplikums,
    sugary_drinks = sulas_tejas_kakao_pieni_u_c_ukuru_saturosi_dzerieni_katru_dienu_vai_gandriz_katru_dienu,
    
    d1mft,
    d3mft, 

    # number_of_arrested_lesions, 
    number_of_active_lesions
    
  )
  # table
  # gtsummary::tbl_summary(
  #  by = grupas_krasa,
    
    # table specifications
    # type = all_continuous() ~ "continuous2",
    # statistic = all_continuous() ~ "{mean} ({sd})",
  #  missing = "no"
  #) %>% 
  # table adorns
  #gtsummary::modify_caption("**Table 1. Baseline Patient Characteristics**") %>% 
  # gtsummary::add_overall() %>% 
  #gtsummary::bold_labels() 
table1 <- table1::table1(
  ~  age_months  +
    dzimums +
    toothbrushing_freq +
    toothpaste +
    sweets_daily +
    plaque +
    sugary_drinks +
    d1mft +
    d3mft +
    number_of_active_lesions
  | grupas_krasa,
  data = table1, 
  type = c("simple", "condensed")
) 

Table 1 exported to HTML

table1 %>%
  kbl() %>%
  kable_classic(full_width = F) %>%
  kable_styling(
    bootstrap_options = c("striped", "hover"),
    full_width = FALSE,
    position = "left"
  ) %>%
  column_spec(1, bold = TRUE) %>%
  readr::write_file(., here("tables", "table_01.html"))

Table 1 with p values

data_consolidated %>%
  filter(time == "baseline") %>% 
  select(
    age_months,
    dzimums,
    grupas_krasa,
    toothbrushing_freq = zobu_tirisanas_biezums,
    toothpaste = zobu_pasta,
    sweets_daily = saldumi_ikdiena,
    plaque = aplikums,
    sugary_drinks = sulas_tejas_kakao_pieni_u_c_ukuru_saturosi_dzerieni_katru_dienu_vai_gandriz_katru_dienu,
    d1mft,
    d3mft, 
    number_of_active_lesions
    
  ) %>% 
  gtsummary::tbl_summary(by = grupas_krasa) %>% 
  gtsummary::add_p()
Characteristic Balta, N = 701 Dzeltena, N = 701 Lillā, N = 721 Rozā, N = 711 Zaļa, N = 721 Zila, N = 721 p-value2
Age(months) 44 (37, 55) 49 (39, 59) 46 (36, 59) 44 (36, 52) 46 (32, 58) 46 (39, 54) 0.8
Gender 0.7
    Meitene 30 (43%) 31 (44%) 35 (49%) 26 (37%) 28 (39%) 28 (39%)
    Zēns 40 (57%) 39 (56%) 37 (51%) 45 (63%) 44 (61%) 44 (61%)
Toothbrushing frequency
    Divas un vairākas reizes dienā 13 (19%) 23 (33%) 13 (18%) 15 (21%) 25 (35%) 17 (24%)
    Vienu reizi dienā - vakarā 35 (50%) 29 (41%) 38 (53%) 39 (55%) 24 (33%) 35 (49%)
    Reizi dienā no rīta vai retāk kā reizi dienā 20 (29%) 16 (23%) 19 (26%) 16 (23%) 17 (24%) 17 (24%)
    Netīra 2 (2.9%) 2 (2.9%) 2 (2.8%) 1 (1.4%) 6 (8.3%) 3 (4.2%)
toothpaste
    bez 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.4%) 0 (0%)
    Bez fluorīdiem 1 (1.4%) 0 (0%) 2 (2.8%) 3 (4.2%) 1 (1.4%) 3 (4.2%)
    Fluorīdi līdz 1000 ppm 43 (61%) 42 (60%) 45 (62%) 35 (49%) 39 (54%) 34 (47%)
    Fluorīdi vismaz 1000 ppm 26 (37%) 28 (40%) 25 (35%) 32 (45%) 29 (40%) 34 (47%)
    maina 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.4%) 0 (0%)
    nezina 0 (0%) 0 (0%) 0 (0%) 1 (1.4%) 0 (0%) 1 (1.4%)
    vispār nelieto 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.4%) 0 (0%)
sweets_daily 0.12
    Jā 63 (90%) 59 (84%) 54 (75%) 63 (89%) 62 (86%) 57 (79%)
    Nē 7 (10%) 11 (16%) 18 (25%) 8 (11%) 10 (14%) 15 (21%)
Plaque 0.018
    Nav redzams 19 (27%) 29 (41%) 21 (29%) 25 (35%) 21 (29%) 37 (51%)
    Redzams 51 (73%) 41 (59%) 51 (71%) 46 (65%) 51 (71%) 35 (49%)
sugary_drinks 0.2
    Jā 47 (67%) 47 (67%) 49 (68%) 54 (76%) 41 (57%) 53 (74%)
    Nē 23 (33%) 23 (33%) 23 (32%) 17 (24%) 31 (43%) 19 (26%)
d1mft 10.0 (7.0, 14.0) 9.0 (6.0, 12.0) 8.0 (4.0, 12.2) 10.0 (7.0, 13.0) 6.0 (3.0, 8.0) 8.0 (5.0, 10.0) <0.001
d3mft 6.0 (4.0, 11.0) 6.0 (3.0, 9.0) 6.5 (3.0, 9.0) 7.0 (5.0, 10.0) 4.0 (2.0, 7.0) 5.0 (3.8, 8.0) 0.002
Active lesions at baseline 8.5 (6.0, 12.0) 6.0 (3.2, 10.0) 5.5 (4.0, 9.2) 8.0 (6.0, 11.0) 5.0 (2.8, 7.2) 5.5 (2.8, 9.0) <0.001
1 Median (IQR); n (%)
2 Kruskal-Wallis rank sum test; Pearson's Chi-squared test
data_consolidated %>%
  filter(time == "baseline") %>% 
  select(
    toothbrushing_freq = zobu_tirisanas_biezums,
    toothpaste = zobu_pasta,

    plaque = aplikums,
    sugary_drinks = sulas_tejas_kakao_pieni_u_c_ukuru_saturosi_dzerieni_katru_dienu_vai_gandriz_katru_dienu, 
    grupas_krasa
  ) %>% 
  gtsummary::tbl_summary(by = grupas_krasa) %>% 
  gtsummary::add_p()
Characteristic Balta, N = 701 Dzeltena, N = 701 Lillā, N = 721 Rozā, N = 711 Zaļa, N = 721 Zila, N = 721 p-value2
Toothbrushing frequency
    Divas un vairākas reizes dienā 13 (19%) 23 (33%) 13 (18%) 15 (21%) 25 (35%) 17 (24%)
    Vienu reizi dienā - vakarā 35 (50%) 29 (41%) 38 (53%) 39 (55%) 24 (33%) 35 (49%)
    Reizi dienā no rīta vai retāk kā reizi dienā 20 (29%) 16 (23%) 19 (26%) 16 (23%) 17 (24%) 17 (24%)
    Netīra 2 (2.9%) 2 (2.9%) 2 (2.8%) 1 (1.4%) 6 (8.3%) 3 (4.2%)
toothpaste
    bez 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.4%) 0 (0%)
    Bez fluorīdiem 1 (1.4%) 0 (0%) 2 (2.8%) 3 (4.2%) 1 (1.4%) 3 (4.2%)
    Fluorīdi līdz 1000 ppm 43 (61%) 42 (60%) 45 (62%) 35 (49%) 39 (54%) 34 (47%)
    Fluorīdi vismaz 1000 ppm 26 (37%) 28 (40%) 25 (35%) 32 (45%) 29 (40%) 34 (47%)
    maina 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.4%) 0 (0%)
    nezina 0 (0%) 0 (0%) 0 (0%) 1 (1.4%) 0 (0%) 1 (1.4%)
    vispār nelieto 0 (0%) 0 (0%) 0 (0%) 0 (0%) 1 (1.4%) 0 (0%)
Plaque 0.018
    Nav redzams 19 (27%) 29 (41%) 21 (29%) 25 (35%) 21 (29%) 37 (51%)
    Redzams 51 (73%) 41 (59%) 51 (71%) 46 (65%) 51 (71%) 35 (49%)
sugary_drinks 0.2
    Jā 47 (67%) 47 (67%) 49 (68%) 54 (76%) 41 (57%) 53 (74%)
    Nē 23 (33%) 23 (33%) 23 (32%) 17 (24%) 31 (43%) 19 (26%)
1 n (%)
2 Pearson's Chi-squared test

explore the imabalance between groups

d1mft

data_consolidated %>%
  filter(time == "baseline") %>% 
  ggplot(aes(x = fct_reorder(grupas_krasa, d1mft),  
             y = d1mft)) + 
  geom_violin() + 
  geom_boxplot(width = 0.2) + 
    geom_jitter(alpha = 0.1, width = 0.1) +
  labs(title = "d1mft")

#### d1mft

data_consolidated %>%
  filter(time == "baseline") %>% 
  ggplot(aes(x = fct_reorder(grupas_krasa, d3mft),  
             y = d3mft)) + 
  geom_violin() + 
  geom_boxplot(width = 0.2) + 
    geom_jitter(alpha = 0.1, width = 0.1) +
  labs(title = "d3mft")

data_consolidated %>% 
  # select
  filter(time == "baseline") %>% 
  select(grupas_krasa, d1mft, d3mft, number_of_active_lesions_baseline ) %>% 
  # reshape
  pivot_longer(cols = c( d1mft, d3mft), names_to = "variable", 
               values_to = "values") %>% 
  ggplot(aes(x = fct_reorder(grupas_krasa, rev(values)),  
             y = values, 
             fill = variable)) +
  geom_boxplot()

data_consolidated %>% 
  # select
  filter(time == "baseline") %>% 
  select(grupas_krasa, number_of_active_lesions_baseline ) %>% 
  ggplot(aes(x = fct_reorder(grupas_krasa,number_of_active_lesions_baseline),  
             y = number_of_active_lesions_baseline)) + 
  geom_boxplot()

Unique patients per time

Unique patients

n_distinct(data_consolidated$berna_kods) %>% 
  knitr::kable()
x
428

Patients per time

data_consolidated %>% 
  tabyl(time) %>% 
  knitr::kable()
time n percent
baseline 427 0.53375
final 373 0.46625

age patients

data_consolidated <- data_consolidated %>% 
  group_by(time) %>% 
  mutate(mean_age_months = mean(age_months, na.rm = T))
data_consolidated %>% 
  ggplot(aes(x = age_months, 
             fill = time)) + 
  geom_histogram(bins = 9, 
                 show.legend = FALSE) + 
  facet_grid(time ~ . ) + 
  geom_vline(aes(xintercept = mean_age_months, group = time), colour = 'grey50') + 
  labs(title = "Age in months") 

Loss to follow-up

data_consolidated %>%
  tabyl(grupas_krasa, time) %>%
  adorn_totals(where = "row") %>% 
  mutate(LOSS = baseline - final) %>% 
  mutate(LOSS_PCT = (baseline - final) / baseline *100 ) %>% 
  adorn_rounding() %>% 
  knitr::kable()
grupas_krasa baseline final LOSS LOSS_PCT
Balta 70 57 13 18.6
Dzeltena 70 58 12 17.1
Lillā 72 61 11 15.3
Rozā 71 66 5 7.0
Zaļa 72 65 7 9.7
Zila 72 66 6 8.3
Total 427 373 54 12.6
data_consolidated %>%
  group_by(time, grupas_krasa) %>%
  count() %>% 
  ggplot(aes(fill = time, 
             y = n, 
             x = grupas_krasa)) + 
  geom_bar(position="dodge", stat="identity") + 
  labs(title = "Participants per group", 
       x = "Group", 
       fill = "Time") + 
  theme(legend.position = "bottom")

data_consolidated %>%
  tabyl(grupas_krasa, time) %>%
  mutate(LOSS = baseline - final) %>%
  mutate(LOSS_PCT = (baseline - final) / baseline * 100) %>%
  adorn_rounding() %>%
  ggplot(aes(x = fct_reorder(grupas_krasa, LOSS_PCT),
             y = LOSS_PCT)) +
  geom_col() +
  labs(title = "Loss to follow-up in percentage",
       y = "Percentage",
       x = "Group") +
  ylim(0, 100) +
  geom_hline(yintercept = 30,
             linetype = "dashed",
             color = "grey") +
  geom_text(aes(x = 5.5, y = 32,
                label = "Loss to follow-up threshold = 30%"), 
            size = 3, color = "grey")

COMPLICATION ANALYSIS

Major complications per group

Detail

data_consolidated %>%
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(grupas_krasa, komplikacijas_nopietnas) %>%
  group_by(grupas_krasa, komplikacijas_nopietnas) %>%
  tabyl(grupas_krasa, komplikacijas_nopietnas) %>%
  adorn_totals(where = "row") %>% 
  adorn_percentages("row") %>%
  adorn_pct_formatting(digits = 2) %>%
  adorn_ns() %>% 
  knitr::kable()
grupas_krasa
Balta 50.88% (29) 49.12% (28)
Dzeltena 48.28% (28) 51.72% (30)
Lillā 49.18% (30) 50.82% (31)
Rozā 45.45% (30) 54.55% (36)
Zaļa 21.54% (14) 78.46% (51)
Zila 53.03% (35) 46.97% (31)
Total 44.50% (166) 55.50% (207)

Percentage of complications

data_consolidated %>%
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(grupas_krasa, komplikacijas_nopietnas) %>%
  group_by(grupas_krasa, komplikacijas_nopietnas) %>%
  tabyl(grupas_krasa, komplikacijas_nopietnas) %>% 
  mutate(MajorComplicationsPct = Jā / (Jā + Nē) * 100) %>% 
  mutate(error_estimate = sqrt(MajorComplicationsPct)) %>% 
  adorn_rounding() %>% 
  knitr::kable()
grupas_krasa MajorComplicationsPct error_estimate
Balta 29 28 50.9 7.1
Dzeltena 28 30 48.3 6.9
Lillā 30 31 49.2 7.0
Rozā 30 36 45.5 6.7
Zaļa 14 51 21.5 4.6
Zila 35 31 53.0 7.3

A good estimate of the standard deviation of the Poisson distribution is the square root of the count. Check this.

Test differences of MAJOR complications between groups

set the reference level for the response variable

data_consolidated <- data_consolidated %>% 
  # set response variable
  mutate(komplikacijas_nopietnas = fct_relevel(komplikacijas_nopietnas, "Nē")) %>% 
  # set baseline group
  mutate(grupas_krasa = fct_relevel(grupas_krasa, "Lillā"))

H0: The proportion of cases is the same in each group: p1 = p2 = p3 = p4 = p5 = p6 Ha: The proportion of cases is not the same in each group: at least one pi is different from the others

Test using the prop.test

data_consolidated %>%
  # filter the data
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(group = grupas_krasa, 
         Major_complications = komplikacijas_nopietnas) %>% 
  mutate(Major_complications = fct_recode(Major_complications, 
                                           "Yes" = "Jā", 
                                           "No" =  "Nē")) %>% 
  # create the table for the prop test
  group_by(group, Major_complications) %>%
  tabyl(group, Major_complications) %>% 
  mutate(total = Yes + No)  %>% 
  # the prop test
  with(prop.test(Yes, total))
## 
##  6-sample test for equality of proportions without continuity
##  correction
## 
## data:  Yes out of total
## X-squared = 17.659, df = 5, p-value = 0.003406
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4    prop 5    prop 6 
## 0.4918033 0.5087719 0.4827586 0.4545455 0.2153846 0.5303030

Conclusion: When testing the null hypothesis that the proportion of major complications is the same for each group we reject the null hypothesis (X-squared = 17.659, df = 5, p-value = 0.003406).

Test for each group

model.beta <- data_consolidated %>%
  # filter the data
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(group = grupas_krasa, 
         Major_complications = komplikacijas_nopietnas) %>% 
  mutate(Major_complications = fct_recode(Major_complications, 
                                           "Yes" = "Jā", 
                                           "No" =  "Nē")) %>% 

  # regression  model
  glm(Major_complications ~ group, 
      data = ., family = binomial) 
model.beta %>% 
  # regression table
  gtsummary::tbl_regression(exponentiate = TRUE)
Characteristic OR1 95% CI1 p-value
group
    Lillā
    Balta 1.07 0.52, 2.21 0.9
    Dzeltena 0.96 0.47, 1.98 >0.9
    Rozā 0.86 0.43, 1.73 0.7
    Zaļa 0.28 0.13, 0.61 0.001
    Zila 1.17 0.58, 2.35 0.7
1 OR = Odds Ratio, CI = Confidence Interval
model.beta %>% 
  sjPlot::plot_model()

report(model.beta)
## We fitted a logistic model (estimated using ML) to predict Major_complications
## with group (formula: Major_complications ~ group). The model's explanatory
## power is weak (Tjur's R2 = 0.05). The model's intercept, corresponding to group
## = Lillā, is at -0.03 (95% CI [-0.54, 0.47], p = 0.898). Within this model:
## 
##   - The effect of group [Balta] is statistically non-significant and positive
## (beta = 0.07, 95% CI [-0.66, 0.79], p = 0.854; Std. beta = 0.07, 95% CI [-0.66,
## 0.79])
##   - The effect of group [Dzeltena] is statistically non-significant and negative
## (beta = -0.04, 95% CI [-0.76, 0.68], p = 0.921; Std. beta = -0.04, 95% CI
## [-0.76, 0.68])
##   - The effect of group [Rozā] is statistically non-significant and negative
## (beta = -0.15, 95% CI [-0.85, 0.55], p = 0.674; Std. beta = -0.15, 95% CI
## [-0.85, 0.55])
##   - The effect of group [Zaļa] is statistically significant and negative (beta =
## -1.26, 95% CI [-2.06, -0.50], p = 0.001; Std. beta = -1.26, 95% CI [-2.06,
## -0.50])
##   - The effect of group [Zila] is statistically non-significant and positive
## (beta = 0.15, 95% CI [-0.54, 0.85], p = 0.665; Std. beta = 0.15, 95% CI [-0.54,
## 0.85])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald z-distribution approximation.

Check Cribari-Neto, F. and Zeileis, A. 2010. Beta Regression in R. Journal of Statistical Software 34(2). www.jstatsoft.org/article/view/v034i02/v34i02.pdf.

Graph

data_consolidated %>%
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(grupas_krasa, komplikacijas_nopietnas) %>%
  group_by(grupas_krasa, komplikacijas_nopietnas) %>%
  tabyl(grupas_krasa, komplikacijas_nopietnas) %>%
  mutate(MajorComplicationsPct = Jā / (Jā + Nē) * 100) %>%
  mutate(error_estimate = sqrt(MajorComplicationsPct)) %>%
  adorn_rounding()  %>%
  
  ggplot(aes(
    x = fct_reorder(grupas_krasa, MajorComplicationsPct) ,
    y = MajorComplicationsPct, 
    label = MajorComplicationsPct
  )) +
  geom_col() +
  # scale_fill_manual(values = c("green", "mistyrose", "magenta", "yellow", "blue", "lightgrey"))+
  coord_cartesian(ylim=c(0,100))+ 
  labs(title = "Major complications per group",
       y = "Percentage",
       x = "Group")  +
  geom_errorbar(
    aes(
      ymin = MajorComplicationsPct - error_estimate,
      ymax = MajorComplicationsPct + error_estimate
    ),
    width = .2,
    position = position_dodge(.9)
  ) +
  scale_y_continuous(labels = scales::percent_format(scale = 1)) 

Minor complications

Details

data_consolidated %>%
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(grupas_krasa, komplikacijas_minimalas) %>%
  group_by(grupas_krasa, komplikacijas_minimalas) %>%
  tabyl(grupas_krasa, komplikacijas_minimalas) %>%
  adorn_totals(where = "row") %>% 
  adorn_percentages("row") %>%
  adorn_pct_formatting(digits = 2) %>%
  adorn_ns() %>% 
  knitr::kable()
grupas_krasa
Lillā 90.16% (55) 9.84% (6)
Balta 89.47% (51) 10.53% (6)
Dzeltena 86.21% (50) 13.79% (8)
Rozā 83.33% (55) 16.67% (11)
Zaļa 69.23% (45) 30.77% (20)
Zila 80.30% (53) 19.70% (13)
Total 82.84% (309) 17.16% (64)

Percentage of complications

data_consolidated %>%
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(grupas_krasa, komplikacijas_minimalas) %>%
  group_by(grupas_krasa, komplikacijas_minimalas) %>%
  tabyl(grupas_krasa, komplikacijas_minimalas) %>% 
  mutate(MinorComplicationsPct = Jā / (Jā + Nē) * 100) %>% 
  mutate(error_estimate = sqrt(MinorComplicationsPct)) %>%
  adorn_rounding() %>% 
  knitr::kable()
grupas_krasa MinorComplicationsPct error_estimate
Lillā 55 6 90.2 9.5
Balta 51 6 89.5 9.5
Dzeltena 50 8 86.2 9.3
Rozā 55 11 83.3 9.1
Zaļa 45 20 69.2 8.3
Zila 53 13 80.3 9.0

Graph

data_consolidated %>%
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(grupas_krasa, komplikacijas_minimalas) %>%
  group_by(grupas_krasa, komplikacijas_minimalas) %>%
  tabyl(grupas_krasa, komplikacijas_minimalas) %>% 
  mutate(MinorComplicationsPct = Jā / (Jā + Nē) * 100) %>% 
  mutate(error_estimate = sqrt(MinorComplicationsPct)) %>%
  adorn_rounding() %>% 
  ggplot(aes(x = fct_reorder(grupas_krasa, MinorComplicationsPct) , 
             y = MinorComplicationsPct)) + 
  geom_col() + 
  coord_cartesian(ylim=c(0,100))+ 
  labs(title = "Minor complications per group", 
       y = "Percentage", 
       x = "Group") +
  geom_errorbar(
    aes(
      ymin = MinorComplicationsPct - error_estimate,
      ymax = MinorComplicationsPct + error_estimate
    ),
    width = .2,
    position = position_dodge(.9)
  ) +
  scale_y_continuous(labels = scales::percent_format(scale = 1))

Test differences of MINOR complications between groups

set the reference level for the response variable

data_consolidated <- data_consolidated %>% 
  mutate(komplikacijas_minimalas = fct_relevel(komplikacijas_minimalas, "Nē"))

H0: The proportion of cases is the same in each group: p1 = p2 = p3 = p4 = p5 = p6 Ha: The proportion of cases is not the same in each group: at least one pi is different from the others

Test using the prop.test

data_consolidated %>%
  # filter the data
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(group = grupas_krasa, 
         Minor_complications = komplikacijas_minimalas) %>% 
  mutate(Minor_complications = fct_recode(Minor_complications, 
                                           "Yes" = "Jā", 
                                           "No" =  "Nē")) %>% 
  # create the table for the prop test
  group_by(group, Minor_complications) %>%
  tabyl(group, Minor_complications) %>% 
  mutate(total = Yes + No)  %>% 
  # the prop test
  with(prop.test(Yes, total))
## 
##  6-sample test for equality of proportions without continuity
##  correction
## 
## data:  Yes out of total
## X-squared = 13.309, df = 5, p-value = 0.02065
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4    prop 5    prop 6 
## 0.9016393 0.8947368 0.8620690 0.8333333 0.6923077 0.8030303

Conclusion: When testing the null hypothesis that the proportion of minor complications is the same for each group we reject the null hypothesis X-squared = 13.309, df = 5, p-value = 0.02065 .

Test for each group ## Regression model for minor complications

model.beta <- data_consolidated %>%
  # filter the data
  # filter only final
  filter(time == "final") %>%
  # count by complications
  select(group = grupas_krasa, 
         Minor_complications = komplikacijas_minimalas) %>% 
  mutate(Minor_complications = fct_recode(Minor_complications, 
                                           "Yes" = "Jā", 
                                           "No" =  "Nē")) %>% 

  # regression  model
  glm(Minor_complications ~ group, 
      data = ., family = binomial) 
model.beta %>% 
  # regression table
  gtsummary::tbl_regression(exponentiate = TRUE)
Characteristic OR1 95% CI1 p-value
group
    Lillā
    Balta 0.93 0.27, 3.14 >0.9
    Dzeltena 0.68 0.21, 2.09 0.5
    Rozā 0.55 0.18, 1.54 0.3
    Zaļa 0.25 0.08, 0.63 0.006
    Zila 0.44 0.15, 1.21 0.13
1 OR = Odds Ratio, CI = Confidence Interval
model.beta %>% 
  sjPlot::plot_model()

report(model.beta)
## We fitted a logistic model (estimated using ML) to predict Minor_complications
## with group (formula: Minor_complications ~ group). The model's explanatory
## power is weak (Tjur's R2 = 0.04). The model's intercept, corresponding to group
## = Lillā, is at 2.22 (95% CI [1.45, 3.17], p < .001). Within this model:
## 
##   - The effect of group [Balta] is statistically non-significant and negative
## (beta = -0.08, 95% CI [-1.30, 1.14], p = 0.901; Std. beta = -0.08, 95% CI
## [-1.30, 1.14])
##   - The effect of group [Dzeltena] is statistically non-significant and negative
## (beta = -0.38, 95% CI [-1.55, 0.74], p = 0.505; Std. beta = -0.38, 95% CI
## [-1.55, 0.74])
##   - The effect of group [Rozā] is statistically non-significant and negative
## (beta = -0.61, 95% CI [-1.73, 0.43], p = 0.264; Std. beta = -0.61, 95% CI
## [-1.73, 0.43])
##   - The effect of group [Zaļa] is statistically significant and negative (beta =
## -1.40, 95% CI [-2.48, -0.46], p = 0.006; Std. beta = -1.40, 95% CI [-2.48,
## -0.46])
##   - The effect of group [Zila] is statistically non-significant and negative
## (beta = -0.81, 95% CI [-1.92, 0.19], p = 0.126; Std. beta = -0.81, 95% CI
## [-1.92, 0.19])
## 
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald z-distribution approximation.

MAJOR COMPLICATION ADJUSTED REGRESSION ANALYSIS

Create a variable called major complication yes

data_consolidated <- data_consolidated %>%
  mutate(major_complication = case_when(komplikacijas_nopietnas == "Jā" ~ "1",
                                        TRUE ~ "0")) %>%
  relocate(major_complication, .after = grupas_krasa)  %>% 
  mutate(major_complication = as.double(major_complication))

Complications number

table(data_consolidated$major_complication) %>% 
  knitr::kable()
Var1 Freq
0 634
1 166

Sex

table(data_consolidated$dzimums) %>% 
  knitr::kable()
Var1 Freq
Meitene 337
Zēns 463

ACTIVITY ANALYSIS BASELINE FINAL

first, select only children before and after

baseline_final_data <- data_consolidated %>% 
  # select(time, berna_kods) %>% 
  group_by(berna_kods) %>% 
  filter(n() > 1) 

Since I will not use anymore the data_consolidated, I will remove it

# rm(data_consolidated)

remove irrelevant variables

baseline_final_data <- baseline_final_data %>% 
  select(-c(timestamp, izmeklesanas_datums, mediciniska_anamneze:aplikums, 
            pudele_pec_gada:x2, 
            cooperation_formula:ga_after_follow_up) )

Now, I will create a dataset with only the activty data

First table aktivs vs neaktivs

 baseline_final_data <- baseline_final_data %>% 
  # select relevant variables
  select(time, grupas_krasa, berna_kods, kariesa_aktivitate_55:kariesa_aktivitate_85) %>% 
  # longer format for caries activity
  pivot_longer(kariesa_aktivitate_55:kariesa_aktivitate_85, 
               names_to = "karies_activitate", 
               values_to = "karies_act_values") %>% 
  mutate(karies_act_values = fct_relevel(karies_act_values, "Neaktīvs", "Aktīvs" )) %>%
  mutate(karies_act_values = fct_collapse(karies_act_values, "Nav attiecināms" = c("Nav attiecināms (0, P vai E)", "Nav attiecināms (0, P, E)" ))) %>% 
 
  # separate the caries activioty
    separate(karies_activitate, into = c("delete", "tooth"), sep="_(?=[^_]+$)") %>% 
  select(-delete)

Change to just three levels of karies_act_values

baseline_final_data <- baseline_final_data %>% 
  # pivot_longer(kariesa_aktivitate_55:kariesa_aktivitate_85, 
  #             names_to = "karies_activitate", 
  #             values_to = "karies_act_values") %>% 
  mutate(karies_act_values = fct_relevel(karies_act_values, "Neaktīvs", "Aktīvs" )) %>%
  mutate(karies_act_values = fct_collapse(karies_act_values, "Nav attiecināms" = c("Nav attiecināms (0, P vai E)", "Nav attiecināms (0, P, E)" ))) %>% 
  # relevel 
  mutate(status = fct_relevel(karies_act_values, "Aktīvs", "Neaktīvs"))

Check

table(baseline_final_data$time, baseline_final_data$status) %>% 
  knitr::kable()
Aktīvs Neaktīvs Nav attiecināms
baseline 2643 200 4597
final 1466 1171 4803

Data in longer almost ready for the activity analysis before after. Just pending to remove the “Nav attiecināms” at the baseline

baseline_final_data %>% 
  janitor::tabyl(status, time) %>% 
  knitr::kable()
status baseline final
Aktīvs 2643 1466
Neaktīvs 200 1171
Nav attiecināms 4597 4803

Try 1 to create a dataset for the table

Remove the Nav attiecināms from the baseline

baseline_final_data <- 
  baseline_final_data %>% 
  filter(!(time == "baseline" & status == "Nav attiecināms")) %>% 
  mutate(status = fct_drop(status))

Check

baseline_final_data %>% 
  janitor::tabyl(time, status) %>% 
  knitr::kable()
time Aktīvs Neaktīvs Nav attiecināms
baseline 2643 200 0
final 1466 1171 4803

Now, using brute force, will create to datasets, for baseline and final

change_baseline <- baseline_final_data %>%
  filter(time == "baseline") %>%
  mutate(status = fct_drop(status)) %>% 
  rename(activity_baseline = status) %>%
  select(-time)

Now the final

change_final <- baseline_final_data %>% 
  filter(time == "final") %>% 
  rename(activity_final = status) %>% 
  select(-c(time, grupas_krasa))

now join

change_table <- left_join(change_baseline, change_final, 
          by = c("berna_kods", "tooth"))
rm(baseline_final_data, change_baseline, change_final)
change_table %>% 
  janitor::tabyl(activity_baseline, activity_final) %>% 
  knitr::kable()
activity_baseline Aktīvs Neaktīvs Nav attiecināms
Aktīvs 1057 899 687
Neaktīvs 35 135 30
mosaicplot(table(change_table$activity_baseline, change_table$activity_final), shade = T)

chisq.test(table(change_table$activity_baseline, change_table$activity_final))
## 
##  Pearson's Chi-squared test
## 
## data:  table(change_table$activity_baseline, change_table$activity_final)
## X-squared = 90.721, df = 2, p-value < 2.2e-16
change_table %>%
  ggplot() +
  geom_mosaic(aes(x = product(activity_baseline),
                  fill = activity_final)) +
  labs(title = 'Change from baseline to final', 
       x = "Baseline status", 
       fill = "Activity final", 
       y = "")

change_table %>%
  ggplot() +
  geom_mosaic(aes(x = product(activity_baseline),
                  fill = activity_final)) +
  labs(title = 'Change from baseline to final', 
       x = "Baseline status", 
       fill = "Activity final", 
       y = "") + 
  facet_grid(grupas_krasa ~ . )

change_table %>%
  ggplot() +
  geom_mosaic(aes(x = product(activity_baseline),
                  fill = activity_final)) +
  labs(title = 'Change from baseline to final', 
       x = "Baseline status", 
       fill = "Activity final", 
       y = "") + 
  facet_grid(. ~ grupas_krasa )

change_table %>% 
  with(table(.$activity_baseline, .$activity_final, .$grupas_krasa)) 
## , ,  = Lillā
## 
##           
##            Aktīvs Neaktīvs Nav attiecināms
##   Aktīvs      169      102             141
##   Neaktīvs      3       33               0
## 
## , ,  = Balta
## 
##           
##            Aktīvs Neaktīvs Nav attiecināms
##   Aktīvs      252      143             122
##   Neaktīvs      4        2               2
## 
## , ,  = Dzeltena
## 
##           
##            Aktīvs Neaktīvs Nav attiecināms
##   Aktīvs      175       81             146
##   Neaktīvs     14       34              17
## 
## , ,  = Rozā
## 
##           
##            Aktīvs Neaktīvs Nav attiecināms
##   Aktīvs      191      240             131
##   Neaktīvs      0       15               3
## 
## , ,  = Zaļa
## 
##           
##            Aktīvs Neaktīvs Nav attiecināms
##   Aktīvs      109      176              64
##   Neaktīvs      1        6               0
## 
## , ,  = Zila
## 
##           
##            Aktīvs Neaktīvs Nav attiecināms
##   Aktīvs      161      157              83
##   Neaktīvs     13       45               8
mantelhaen.test(table(change_table$activity_baseline, change_table$activity_final, change_table$grupas_krasa))
## 
##  Cochran-Mantel-Haenszel test
## 
## data:  table(change_table$activity_baseline, change_table$activity_final, change_table$grupas_krasa)
## Cochran-Mantel-Haenszel M^2 = 111.72, df = 2, p-value < 2.2e-16

Analysis per tooth surface

Test differences of ACTIVITY PER TEETH between groups

change_table <- change_table %>% 
  select(grupas_krasa, berna_kods, 
         tooth, activity_baseline, activity_final)

Extract the information of the status at baseline and final times

change_table %>% 
  janitor::tabyl(activity_baseline, activity_final)  %>% 
  knitr::kable()
activity_baseline Aktīvs Neaktīvs Nav attiecināms
Aktīvs 1057 899 687
Neaktīvs 35 135 30
change_table <- change_table %>% 
  rename(group = grupas_krasa)

Create a new variable with the change outcome

change_table <- change_table %>% 
  mutate(change = case_when(
    # activity_baseline == "Aktīvs" & activity_final == "Aktīvs" ~ "No change", 
    activity_baseline == "Aktīvs" & activity_final == "Neaktīvs" ~ "Stopped", 
    # activity_baseline == "Neaktīvs" & activity_final == "Aktīvs" ~ "No_change",
    # activity_baseline == "Neaktīvs" & activity_final == "Neaktīvs" ~ "No change",
    TRUE ~ "No_change"
  ))

There are four possible outcomes

table(change_table$change) %>% 
  knitr::kable()
Var1 Freq
No_change 1944
Stopped 899

Test differences of CHANGE between groups

Test differences of MINOR complications between groups

set the reference level for the response variable. THIS IS IMPORTANT! Allow to explain the odds ratio of what is being prevented or increased. in this case, the first level is the risk of…

change_table$change = fct_relevel(change_table$change, "Stopped", "No_change")

Set the control group as Lillā

change_table$group =  fct_relevel(change_table$group, "Lillā")

Proportion of cases per group

change_table %>% 
  tabyl(group, change) %>% 
  adorn_percentages(denominator = "row") %>% 
  adorn_rounding(digits = 2) %>% 
  knitr::kable()
group Stopped No_change
Lillā 0.23 0.77
Balta 0.27 0.73
Dzeltena 0.17 0.83
Rozā 0.41 0.59
Zaļa 0.49 0.51
Zila 0.34 0.66

H0: The proportion of cases is the same in each group: p1 = p2 = p3 = p4 = p5 = p6 Ha: The proportion of cases is not the same in each group: at least one pi is different from the others

Test using the prop.test

change_table %>%
 
  tabyl(group, change) %>% 
  mutate(total = Stopped + No_change) %>% 
  # the prop test
  with(prop.test(Stopped, total))
## 
##  6-sample test for equality of proportions without continuity
##  correction
## 
## data:  Stopped out of total
## X-squared = 143.6, df = 5, p-value < 2.2e-16
## alternative hypothesis: two.sided
## sample estimates:
##    prop 1    prop 2    prop 3    prop 4    prop 5    prop 6 
## 0.2276786 0.2723810 0.1734475 0.4137931 0.4943820 0.3361884

Conclusion: When testing the null hypothesis that the proportion of stopped is the same for each group we reject the null hypothesis X-squared = 143.6, df = 5, p-value < 2.2e-16.

Regression for major complications

Test for each group

model.beta <- change_table %>%
  # regression  model
  glm(change ~ group, 
      data = ., family = binomial) 
model.beta %>% 
  # regression table  
  gtsummary::tbl_regression(exponentiate = TRUE)
Characteristic OR1 95% CI1 p-value
group
    Lillā
    Balta 0.79 0.59, 1.05 0.11
    Dzeltena 1.40 1.02, 1.95 0.041
    Rozā 0.42 0.32, 0.55 <0.001
    Zaļa 0.30 0.22, 0.41 <0.001
    Zila 0.58 0.43, 0.78 <0.001
1 OR = Odds Ratio, CI = Confidence Interval
model.beta %>% 
  sjPlot::plot_model() + 
  labs(title = "OR to Stop lesions")

# report(model.beta)

21 June 2023 Baseline healthy and evolution to final

Baseline kariess == 0 recode 1 to P = New lesion recode e-pufa = complication

compare change baseline to final only for kariess == 0

Convert the data from wide to long with selected variables

data_consolidated_long_kariess <- data_consolidated %>%
  select(time, grupas_krasa, berna_kods,
         kariess_55:kariess_85) %>%
  pivot_longer(kariess_55:kariess_85,
               names_to = "tooth",
               values_to = "status") %>%
  mutate(status = recode(status, "Pufa" = "PUFA")) %>%
  mutate(status = recode(status,
                         `0` = "Healthy",
                         `1` = "New lesion",
                         `2` = "New lesion",
                         `3` = "New lesion",
                         `4` = "New lesion",
                         `5` = "New lesion",
                         `6` = "New lesion",
                         `E` = "Complication",
                         `P` = "New lesion",
                         `PUFA` = "Complication")) %>%
  mutate(status = fct_relevel(status, "Healthy", "New Lesion")) 

Create a subset for the time == baseline and status == health

filtered_data_consolidated_long_kariess <- subset(data_consolidated_long_kariess, time == "baseline" & status == "Healthy")

now add the data from time = final

filtered_data_consolidated_long_kariess <- filtered_data_consolidated_long_kariess %>% 
  select(berna_kods, 
         tooth) %>% 
  left_join( data_consolidated_long_kariess, 
          by = c("berna_kods", "tooth")) %>% 
  select(time = time.y, 
         berna_kods, 
         tooth, 
         grupas_krasa, 
         status)
rm(data_consolidated_long_kariess)

Relevel the status for the tables

filtered_data_consolidated_long_kariess <- filtered_data_consolidated_long_kariess %>% 
  mutate(status = fct_relevel(status, "Healthy", "New lesion"))
filtered_data_consolidated_long_kariess %>% 
  tabyl(time, status)
##      time Healthy New lesion Complication
##  baseline    4761          0            0
##     final    3696        460           37

Convert names groups to eng

filtered_data_consolidated_long_kariess <- filtered_data_consolidated_long_kariess %>%
  mutate(groups = fct_recode(grupas_krasa,
    "SDF1" = "Rozā",
    "SDF2" = "Zaļa",
    "TF1" = "Balta",
    "TF2" = "Zila",
    "P1" = "Dzeltena",
    "P2" = "Lillā"
  ))

Now relevel the groups

filtered_data_consolidated_long_kariess <- filtered_data_consolidated_long_kariess %>%
  mutate(groups = fct_relevel(groups,

    "P2",                              
    "P1", 
 
    "TF1", 
    "TF2", 
    "SDF1", 
    "SDF2"
  ))

Figure

filtered_data_consolidated_long_kariess %>%
  # filter(time == "final") %>%
  count(time, status, groups) %>%
  as_tibble() %>%
  filter(time == "final") %>%
  ggplot(aes(x = time,
             y = n,
             fill = status)) +
  geom_col(position = "fill") +
  facet_grid(. ~ groups) +
  labs(title = "Evolution of Healthy Teeth at Baseline",
       fill = "Final Status",
       x = "",
       y = "") +
  theme(axis.text.x = element_blank()) +
  scale_y_continuous(labels = label_percent()) + 
  scale_fill_viridis_d(direction = -1, option = "plasma") 

The table: overall

filtered_data_consolidated_long_kariess %>% 
  tabyl(time, status) %>%
  rename("Time" = "time") %>% 
    adorn_totals() %>%
  kable(format = "html", escape = FALSE) %>% 
  kable_paper("hover", full_width = F)
Time Healthy New lesion Complication
baseline 4761 0 0
final 3696 460 37
Total 8457 460 37

The table

By status

filtered_data_consolidated_long_kariess %>% 
  filter(time == "final") %>% 
  tabyl(groups, status) %>%
  rename("Group" = "groups") %>% 
    adorn_totals() %>%
  kable(format = "html", escape = FALSE) %>% 
  kable_paper("hover", full_width = F)
Group Healthy New lesion Complication
P2 564 89 7
P1 535 84 10
TF1 489 61 3
TF2 706 73 13
SDF1 615 62 0
SDF2 787 91 4
Total 3696 460 37

By group

filtered_data_consolidated_long_kariess %>% 
  filter(time == "final") %>% 
  tabyl(status, groups) %>%
#   rename("Group" = "groups") %>% 
    adorn_totals() %>%
  kable(format = "html", escape = FALSE) %>% 
  kable_paper("hover", full_width = F)
status P2 P1 TF1 TF2 SDF1 SDF2
Healthy 564 535 489 706 615 787
New lesion 89 84 61 73 62 91
Complication 7 10 3 13 0 4
Total 660 629 553 792 677 882

By status

filtered_data_consolidated_long_kariess %>% 
  filter(time == "final") %>% 
  select(-time, 
         -berna_kods,
         -grupas_krasa, 
         -tooth) %>% 
  gtsummary::tbl_summary(by = status)
Characteristic Healthy, N = 3,6961 New lesion, N = 4601 Complication, N = 371
groups
    P2 564 (15%) 89 (19%) 7 (19%)
    P1 535 (14%) 84 (18%) 10 (27%)
    TF1 489 (13%) 61 (13%) 3 (8.1%)
    TF2 706 (19%) 73 (16%) 13 (35%)
    SDF1 615 (17%) 62 (13%) 0 (0%)
    SDF2 787 (21%) 91 (20%) 4 (11%)
1 n (%)

By groups

filtered_data_consolidated_long_kariess %>% 
  filter(time == "final") %>% 
  select(-time, 
         -berna_kods,
         -grupas_krasa, 
         -tooth) %>% 
  gtsummary::tbl_summary(by = groups) 
Characteristic P2, N = 6601 P1, N = 6291 TF1, N = 5531 TF2, N = 7921 SDF1, N = 6771 SDF2, N = 8821
status
    Healthy 564 (85%) 535 (85%) 489 (88%) 706 (89%) 615 (91%) 787 (89%)
    New lesion 89 (13%) 84 (13%) 61 (11%) 73 (9.2%) 62 (9.2%) 91 (10%)
    Complication 7 (1.1%) 10 (1.6%) 3 (0.5%) 13 (1.6%) 0 (0%) 4 (0.5%)
1 n (%)

By groups with CI95%

filtered_data_consolidated_long_kariess %>% 
  filter(time == "final") %>% 
  select(-time, 
         -berna_kods,
         -grupas_krasa, 
         -tooth) %>% 
  gtsummary::tbl_summary(by = groups) %>% 
  gtsummary::add_ci()
Characteristic P2, N = 6601 95% CI2 P1, N = 6291 95% CI2 TF1, N = 5531 95% CI2 TF2, N = 7921 95% CI2 SDF1, N = 6771 95% CI2 SDF2, N = 8821 95% CI2
status
    Healthy 564 (85%) 82%, 88% 535 (85%) 82%, 88% 489 (88%) 85%, 91% 706 (89%) 87%, 91% 615 (91%) 88%, 93% 787 (89%) 87%, 91%
    New lesion 89 (13%) 11%, 16% 84 (13%) 11%, 16% 61 (11%) 8.6%, 14% 73 (9.2%) 7.3%, 12% 62 (9.2%) 7.1%, 12% 91 (10%) 8.4%, 13%
    Complication 7 (1.1%) 0.47%, 2.3% 10 (1.6%) 0.81%, 3.0% 3 (0.5%) 0.14%, 1.7% 13 (1.6%) 0.92%, 2.9% 0 (0%) 0.00%, 0.70% 4 (0.5%) 0.15%, 1.2%
1 n (%)
2 CI = Confidence Interval

Check the differences

filtered_data_consolidated_long_kariess %>% 
  tabyl(status, groups) %>% 
  with(chisq.test(.))
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 25.485, df = 10, p-value = 0.004497
filtered_data_consolidated_long_kariess %>%
  count(status, groups) %>%
  pivot_wider(names_from = groups, values_from = n, values_fill = 0) %>%
  select(-status) %>%
  as.data.frame() %>%
  chisq.test()
## 
##  Pearson's Chi-squared test
## 
## data:  .
## X-squared = 25.485, df = 10, p-value = 0.004497
pacman::p_load(emmeans)
# Create a logistic regression model
model <- glm(status ~ groups, data = filtered_data_consolidated_long_kariess, family = binomial)

# Obtain estimated marginal means (proportions)
emm <- emmeans(model, ~ groups, type = "response")

# Perform pairwise comparisons
pairwise_comparisons <- pairs(emm, adjust = "none")

# Extract the results
pairwise_results <- summary(pairwise_comparisons)

# Print the pairwise comparison results
print(pairwise_results)
##  contrast    odds.ratio    SE  df null z.ratio p.value
##  P2 / P1          0.971 0.146 Inf    1  -0.199  0.8422
##  P2 / TF1         1.273 0.212 Inf    1   1.453  0.1462
##  P2 / TF2         1.261 0.193 Inf    1   1.513  0.1302
##  P2 / SDF1        1.504 0.252 Inf    1   2.439  0.0147
##  P2 / SDF2        1.291 0.193 Inf    1   1.713  0.0868
##  P1 / TF1         1.312 0.219 Inf    1   1.625  0.1041
##  P1 / TF2         1.299 0.200 Inf    1   1.699  0.0893
##  P1 / SDF1        1.550 0.261 Inf    1   2.606  0.0092
##  P1 / SDF2        1.330 0.200 Inf    1   1.902  0.0572
##  TF1 / TF2        0.990 0.168 Inf    1  -0.059  0.9533
##  TF1 / SDF1       1.182 0.216 Inf    1   0.913  0.3612
##  TF1 / SDF2       1.014 0.168 Inf    1   0.084  0.9333
##  TF2 / SDF1       1.193 0.204 Inf    1   1.035  0.3006
##  TF2 / SDF2       1.024 0.157 Inf    1   0.156  0.8761
##  SDF1 / SDF2      0.858 0.144 Inf    1  -0.914  0.3607
## 
## Tests are performed on the log odds ratio scale

Model change from baseline to final for the healthy at baseline

First, subset the data for the final, since when baseline the status is always 0

final_data <- filtered_data_consolidated_long_kariess %>% 
  filter(time == "final")
pacman::p_load(lme4)

recode the status to allow the regression

final_data <- final_data %>%
  mutate(status = case_when(
    status == "Healthy" ~ 0,
    status == "New lesion" ~ 1,
    TRUE ~ 2
  )) %>% 
  mutate(status = as.numeric(status)) %>% 
  select(-tooth, 
         -grupas_krasa, 
         -time)

Generalized Linear Mixed Models (GLMM):

pacman::p_load(lme4)

With clustering

final_data <- final_data %>% 
  mutate(status_factor = as.factor(status))
model <- glmer(status_factor  ~ groups + (1 | berna_kods),
               data = final_data,
               family = binomial(link = "logit"),
               control = lmerControl(check.nobs.vs.nRE = "ignore"))
gtsummary::tbl_regression(model, exponentiate = TRUE)
Characteristic OR1 95% CI1 p-value
groups
    P2
    P1 1.12 0.52, 2.38 0.8
    TF1 0.67 0.30, 1.49 0.3
    TF2 0.69 0.33, 1.45 0.3
    SDF1 0.53 0.25, 1.14 0.10
    SDF2 0.70 0.34, 1.46 0.3
1 OR = Odds Ratio, CI = Confidence Interval
model %>% 
  sjPlot::plot_model() + 
  labs(title = "OR for new lesions or complications from healthy baseline", 
       subtitle = "P2 as comparison", 
       caption = "Generalized Linear Mixed Models (GLMM), Clustering effect considered")

Without clustering

# model <- glmer(status_factor  ~ groups ,
#                data = final_data,
#                family = binomial(link = "logit"),
#                control = lmerControl(check.nobs.vs.nRE = "ignore"))
# gtsummary::tbl_regression(model, exponentiate = TRUE)
model %>% 
  sjPlot::plot_model() + 
  labs(title = "OR for new lesions or complications from healthy baseline", 
       subtitle = "P2 as comparison", 
       caption = "Generalized Linear Mixed Models (GLMM), Clustering effect *not* considered")

Mixed Effects Logistic Regression:

model <- glmer(status %in% c(1, 2) ~ groups + (1 | berna_kods),
               data = final_data,
               family = binomial(link = "logit"))
gtsummary::tbl_regression(model, exponentiate = TRUE)
Characteristic OR1 95% CI1 p-value
groups
    P2
    P1 1.11 0.52, 2.37 0.8
    TF1 0.67 0.30, 1.49 0.3
    TF2 0.69 0.33, 1.45 0.3
    SDF1 0.53 0.25, 1.14 0.10
    SDF2 0.70 0.34, 1.46 0.3
1 OR = Odds Ratio, CI = Confidence Interval
model %>% 
  sjPlot::plot_model() + 
  labs(title = "OR for new lesions or complications from healthy baseline", 
       subtitle = "P2 as comparison", 
       caption = "Mixed Effects Logistic Regression, Clustering effect  considered")