Packages

pacman::p_load(tidyverse,
               kableExtra, 
               here, 
               janitor,
               MatchIt,
               scales, 
   
                table1, # contains the label function
               gtsummary, 

               ggpubr, 
               RColorBrewer, 

                leaflet,

               viridis)

Dataset

theme_set(theme_minimal())
df <- janitor::clean_names(df)

Data cleaning

# visdat::vis_dat(df)
names(df)
 [1] "country"              "count"                "family_code"          "nr_of_family_members" "lives_in_specific"    "lives_in"            
 [7] "age"                  "toothpaste"           "toothpaste_full_name" "f_concentration"      "children_adult"      

Clean names and remove empty cols

Age groups

df <- df %>% 
  mutate(age_group = case_when(
    `age` < 6 ~ "0 ~ 5", 
    `age` < 13 ~ "6 ~ 12", 
    `age` < 19 ~ "13 ~ 18", 
    TRUE ~ "> 18"
  ))


#  relevel age groups
df$age_group <- fct_relevel(df$age_group, "0 ~ 5" , "6 ~ 12", "13 ~ 18", "> 18" )
janitor::tabyl(df$age_group) %>% janitor::adorn_pct_formatting() # # table age groups
 df$age_group    n percent
        0 ~ 5  363    6.7%
       6 ~ 12 1248   23.0%
      13 ~ 18  915   16.8%
         > 18 2910   53.5%

IN age group change NA to adult

df$`children_adult` <- df$`children_adult` %>% 
  replace_na("adult")

Grouping fluoride concentrations

Create a new variable F_concentration_group

df <- df %>% 
  mutate(`f_concentration_group` = case_when(
    `f_concentration` < 1 ~ "No fluoride",
    `f_concentration` < 1000 ~ "<1000 ppm",
    `f_concentration` < 1400 ~ "1000-1399 ppm", 
    `f_concentration` < 1501 ~ "1400-1500 ppm", 
    TRUE ~ "NA"
  )) %>% 
  mutate(`f_concentration_group` = fct_relevel(f_concentration_group, "No fluoride", "<1000 ppm","1000-1399 ppm", "1400-1500 ppm" ))
Warning: 1 unknown level in `f`: <1000 ppm

Separate datasets for each country

df_latvia <- df %>% 
  filter(country == "Latvia")
  
df_lithuania <- df %>% 
  filter(country == "Lithuania")
  

EDA

names(df)
 [1] "country"               "count"                 "family_code"           "nr_of_family_members"  "lives_in_specific"    
 [6] "lives_in"              "age"                   "toothpaste"            "toothpaste_full_name"  "f_concentration"      
[11] "children_adult"        "age_group"             "f_concentration_group"
summary(df)
   country              count   family_code        nr_of_family_members lives_in_specific    lives_in              age          
 Length:5436        Min.   :1   Length:5436        Min.   : 2.000       Length:5436        Length:5436        Min.   :    0.00  
 Class :character   1st Qu.:1   Class :character   1st Qu.: 4.000       Class :character   Class :character   1st Qu.:   11.00  
 Mode  :character   Median :1   Mode  :character   Median : 4.000       Mode  :character   Mode  :character   Median :   26.00  
                    Mean   :1                      Mean   : 4.707                                             Mean   :   35.27  
                    3rd Qu.:1                      3rd Qu.: 5.000                                             3rd Qu.:   41.00  
                    Max.   :1                      Max.   :15.000                                             Max.   :44774.00  
                                                                                                              NA's   :4         
  toothpaste        toothpaste_full_name f_concentration    children_adult       age_group      f_concentration_group
 Length:5436        Length:5436          Length:5436        Length:5436        0 ~ 5  : 363   No fluoride  : 775     
 Class :character   Class :character     Class :character   Class :character   6 ~ 12 :1248   1000-1399 ppm: 660     
 Mode  :character   Mode  :character     Mode  :character   Mode  :character   13 ~ 18: 915   1400-1500 ppm:3014     
                                                                               > 18   :2910   NA           : 987     
                                                                                                                     
                                                                                                                     
                                                                                                                     
summary(df_latvia)
   country              count   family_code        nr_of_family_members lives_in_specific    lives_in              age         toothpaste       
 Length:2294        Min.   :1   Length:2294        Min.   : 2.000       Length:2294        Length:2294        Min.   : 0.00   Length:2294       
 Class :character   1st Qu.:1   Class :character   1st Qu.: 4.000       Class :character   Class :character   1st Qu.:10.00   Class :character  
 Mode  :character   Median :1   Mode  :character   Median : 4.000       Mode  :character   Mode  :character   Median :21.00   Mode  :character  
                    Mean   :1                      Mean   : 4.706                                             Mean   :25.96                     
                    3rd Qu.:1                      3rd Qu.: 5.000                                             3rd Qu.:40.00                     
                    Max.   :1                      Max.   :12.000                                             Max.   :85.00                     
 toothpaste_full_name f_concentration    children_adult       age_group      f_concentration_group
 Length:2294          Length:2294        Length:2294        0 ~ 5  : 169   No fluoride  : 322     
 Class :character     Class :character   Class :character   6 ~ 12 : 642   1000-1399 ppm: 257     
 Mode  :character     Mode  :character   Mode  :character   13 ~ 18: 301   1400-1500 ppm:1356     
                                                            > 18   :1182   NA           : 359     
                                                                                                  
                                                                                                  
summary(df_lithuania)
   country              count   family_code        nr_of_family_members lives_in_specific    lives_in              age          
 Length:3142        Min.   :1   Length:3142        Min.   : 2.000       Length:3142        Length:3142        Min.   :    0.00  
 Class :character   1st Qu.:1   Class :character   1st Qu.: 4.000       Class :character   Class :character   1st Qu.:   12.00  
 Mode  :character   Median :1   Mode  :character   Median : 4.000       Mode  :character   Mode  :character   Median :   29.00  
                    Mean   :1                      Mean   : 4.708                                             Mean   :   42.08  
                    3rd Qu.:1                      3rd Qu.: 5.000                                             3rd Qu.:   42.00  
                    Max.   :1                      Max.   :15.000                                             Max.   :44774.00  
                                                                                                              NA's   :4         
  toothpaste        toothpaste_full_name f_concentration    children_adult       age_group      f_concentration_group
 Length:3142        Length:3142          Length:3142        Length:3142        0 ~ 5  : 194   No fluoride  : 453     
 Class :character   Class :character     Class :character   Class :character   6 ~ 12 : 606   1000-1399 ppm: 403     
 Mode  :character   Mode  :character     Mode  :character   Mode  :character   13 ~ 18: 614   1400-1500 ppm:1658     
                                                                               > 18   :1728   NA           : 628     
                                                                                                                     
                                                                                                                     
                                                                                                                     

Convert F to number

df$`f_concentration` <- as.integer(df$`f_concentration`)
Warning: NAs introduced by coercion
df %>% 
  ggplot(aes(x = `f_concentration`)) + 
  geom_histogram(bins = 5)

Convert F2 to number

# df$`f_concentration_2` <- as.integer(df$`f_concentration_for_the_second_paste`)
# df %>% 
#  ggplot(aes(x = `f_concentration_for_the_second_paste`)) + 
#  geom_histogram(bins = 5)

#Table 1

df$children_adult <-
  factor(df$children_adult,
         labels = c("Adult toothpaste",
                    "Child toothpaste"))

label(df$age) <- "Age"
label(df$age_group) <- "Age groups"
label(df$lives_in) <- "Lives In"
label(df$lives_in_specific) <- "Region"
label(df$f_concentration_group) <- "F concentration in toothpaste"
label(df$nr_of_family_members) <- "Number of family members"
df %>% 
  select(age, age_group, lives_in, country) %>% 
  gtsummary::tbl_summary(by = country, missing = "no", 
                         statistic = list(all_continuous() ~ "{mean} ({sd})")) %>% 
  gtsummary::add_overall()
Characteristic Overall, N = 5,4361 Latvia, N = 2,2941 Lithuania, N = 3,1421
Age 35 (607) 26 (17) 42 (799)
Age groups
    0 ~ 5 363 (6.7%) 169 (7.4%) 194 (6.2%)
    6 ~ 12 1,248 (23%) 642 (28%) 606 (19%)
    13 ~ 18 915 (17%) 301 (13%) 614 (20%)
    > 18 2,910 (54%) 1,182 (52%) 1,728 (55%)
Lives In
    City 2,556 (47%) 1,144 (50%) 1,412 (45%)
    Rural area 1,040 (19%) 422 (18%) 618 (20%)
    Town 1,840 (34%) 728 (32%) 1,112 (35%)
1 Mean (SD); n (%)

Table of main results

df %>% 
  select(f_concentration_group, children_adult, country) %>% 
  gtsummary::tbl_summary(by = country, missing = "no", 
                         statistic = list(all_continuous() ~ "{mean} ({sd})")) %>% 
  gtsummary::add_overall()
Characteristic Overall, N = 5,4361 Latvia, N = 2,2941 Lithuania, N = 3,1421
F concentration in toothpaste
    No fluoride 775 (14%) 322 (14%) 453 (14%)
    1000-1399 ppm 660 (12%) 257 (11%) 403 (13%)
    1400-1500 ppm 3,014 (55%) 1,356 (59%) 1,658 (53%)
    NA 987 (18%) 359 (16%) 628 (20%)
children_adult
    Adult toothpaste 4,753 (87%) 1,843 (80%) 2,910 (93%)
    Child toothpaste 683 (13%) 451 (20%) 232 (7.4%)
1 n (%)
df %>% 
  select(lives_in, f_concentration_group) %>% 
  gtsummary::tbl_summary(by = lives_in, missing = "no") %>% 
  gtsummary::add_overall()
Characteristic Overall, N = 5,4361 City, N = 2,5561 Rural area, N = 1,0401 Town, N = 1,8401
F concentration in toothpaste
    No fluoride 775 (14%) 395 (15%) 136 (13%) 244 (13%)
    1000-1399 ppm 660 (12%) 331 (13%) 113 (11%) 216 (12%)
    1400-1500 ppm 3,014 (55%) 1,334 (52%) 600 (58%) 1,080 (59%)
    NA 987 (18%) 496 (19%) 191 (18%) 300 (16%)
1 n (%)

Table of main results for Latvia

df %>% 
  filter(country == "Latvia") %>% 
  select(f_concentration_group, age_group) %>% 
  gtsummary::tbl_summary(by = age_group, missing = "no") %>% 
  gtsummary::add_overall()
Characteristic Overall, N = 2,2941 0 ~ 5, N = 1691 6 ~ 12, N = 6421 13 ~ 18, N = 3011 > 18, N = 1,1821
F concentration in toothpaste
    No fluoride 322 (14%) 25 (15%) 69 (11%) 38 (13%) 190 (16%)
    1000-1399 ppm 257 (11%) 26 (15%) 63 (9.8%) 27 (9.0%) 141 (12%)
    1400-1500 ppm 1,356 (59%) 43 (25%) 409 (64%) 201 (67%) 703 (59%)
    NA 359 (16%) 75 (44%) 101 (16%) 35 (12%) 148 (13%)
1 n (%)
df %>% 
  filter(country == "Latvia") %>% 
  select(lives_in, f_concentration_group) %>% 
  gtsummary::tbl_summary(by = lives_in, missing = "no") %>% 
  gtsummary::add_overall()
Characteristic Overall, N = 2,2941 City, N = 1,1441 Rural area, N = 4221 Town, N = 7281
F concentration in toothpaste
    No fluoride 322 (14%) 203 (18%) 50 (12%) 69 (9.5%)
    1000-1399 ppm 257 (11%) 132 (12%) 33 (7.8%) 92 (13%)
    1400-1500 ppm 1,356 (59%) 602 (53%) 270 (64%) 484 (66%)
    NA 359 (16%) 207 (18%) 69 (16%) 83 (11%)
1 n (%)

Table of main results for Lithuania

df %>% 
  filter(country == "Lithuania") %>% 
  select(f_concentration_group, age_group) %>% 
  gtsummary::tbl_summary(by = age_group, missing = "no") %>% 
  gtsummary::add_overall()
Characteristic Overall, N = 3,1421 0 ~ 5, N = 1941 6 ~ 12, N = 6061 13 ~ 18, N = 6141 > 18, N = 1,7281
F concentration in toothpaste
    No fluoride 453 (14%) 24 (12%) 80 (13%) 107 (17%) 242 (14%)
    1000-1399 ppm 403 (13%) 31 (16%) 85 (14%) 61 (9.9%) 226 (13%)
    1400-1500 ppm 1,658 (53%) 34 (18%) 287 (47%) 361 (59%) 976 (56%)
    NA 628 (20%) 105 (54%) 154 (25%) 85 (14%) 284 (16%)
1 n (%)
df %>% 
  filter(country == "Lithuania") %>% 
  select(lives_in, f_concentration_group) %>% 
  gtsummary::tbl_summary(by = lives_in, missing = "no") %>% 
  gtsummary::add_overall()
Characteristic Overall, N = 3,1421 City, N = 1,4121 Rural area, N = 6181 Town, N = 1,1121
F concentration in toothpaste
    No fluoride 453 (14%) 192 (14%) 86 (14%) 175 (16%)
    1000-1399 ppm 403 (13%) 199 (14%) 80 (13%) 124 (11%)
    1400-1500 ppm 1,658 (53%) 732 (52%) 330 (53%) 596 (54%)
    NA 628 (20%) 289 (20%) 122 (20%) 217 (20%)
1 n (%)

Graph F concentration by age groups

Sergio, por favor, agrega valores en grafico

my_palette <- brewer.pal(name="Spectral", n = 7)[0:9]
df %>% 
  select(age_group, country, f_concentration_group) %>% 
  filter(f_concentration_group != "NA") %>% 
  
  # calculate the percentages
  group_by(country, f_concentration_group, age_group) %>% 
  count() %>% 
  # pivot_wider(names_from = f_concentration_group,
  #            values_from = n)

  group_by(country) %>% 
  mutate(perc = n / sum(n) * 100) %>% 
  mutate(perc = round(perc, 1)) %>% 
  # select(-n) %>% 
  # pivot_wider(names_from = f_concentration_group,
   #           values_from = perc)
  
 
   # make the plot
   ggplot(aes(x = age_group, 
             y = perc, 
             label = perc, 
             fill = f_concentration_group)) +
  geom_col(position="fill") +
  
  scale_fill_brewer(palette = "Spectral") +
  labs(title = "Use of F Toothpaste by Age", 
       fill = "Concentration",
       x = "Age group", 
       y = "", 
       caption = "Numbers on bars corresponds to the number of persons. Latvia N = 2 270, Lithuania = 2 815."
       ) + 
  #geom_text(
   # aes(label = n),
    #size = 2.5, 
    #position = position_fill(0.5)
  # ) +
  scale_y_continuous(labels = label_percent()) +
  # divide by country
  facet_grid( country ~ .) 

ggsave(
  here("figures", "figure_age.pdf"),
  width = 20, height = 20, units = "cm", 
  # res = 300, 
  device='pdf'
)

Differences per age group

table(df$age_group, df$f_concentration_group)
         
          No fluoride 1000-1399 ppm 1400-1500 ppm   NA
  0 ~ 5            49            57            77  180
  6 ~ 12          149           148           696  255
  13 ~ 18         145            88           562  120
  > 18            432           367          1679  432
chisq.test(table(df$age_group, df$f_concentration_group))

    Pearson's Chi-squared test

data:  table(df$age_group, df$f_concentration_group)
X-squared = 333.13, df = 9, p-value < 2.2e-16
mosaicplot(table(df$f_concentration_group, df$age_group), shade = T)

Latvia per age


table(df_latvia$age_group, df_latvia$f_concentration_group)
         
          No fluoride 1000-1399 ppm 1400-1500 ppm  NA
  0 ~ 5            25            26            43  75
  6 ~ 12           69            63           409 101
  13 ~ 18          38            27           201  35
  > 18            190           141           703 148
chisq.test(table(df_latvia$age_group, df_latvia$f_concentration_group))

    Pearson's Chi-squared test

data:  table(df_latvia$age_group, df_latvia$f_concentration_group)
X-squared = 151.94, df = 9, p-value < 2.2e-16
mosaicplot(table(df_latvia$f_concentration_group, df_latvia$age_group), shade = T)

Lithuania per age

  table(df_lithuania$age_group, df_lithuania$f_concentration_group)
         
          No fluoride 1000-1399 ppm 1400-1500 ppm  NA
  0 ~ 5            24            31            34 105
  6 ~ 12           80            85           287 154
  13 ~ 18         107            61           361  85
  > 18            242           226           976 284
chisq.test(table(df_lithuania$age_group, df_lithuania$f_concentration_group))

    Pearson's Chi-squared test

data:  table(df_lithuania$age_group, df_lithuania$f_concentration_group)
X-squared = 213.81, df = 9, p-value < 2.2e-16
mosaicplot(table(df_lithuania$f_concentration_group, df_lithuania$age_group), shade = T)

Graph F concentration by living area

Sergio, por favor, agrega valores en grafico

df %>% 
  select(lives_in, country, f_concentration_group) %>% 
  filter(f_concentration_group != "NA")%>% 
  
    # calculate the percentages
  group_by(country, f_concentration_group, lives_in) %>% 
  count() %>% 
 

  group_by(country) %>% 
  mutate(perc = n / sum(n) * 100) %>% 
  mutate(perc = round(perc, 1)) %>% 
  
  # make the plot

   # make the plot
   ggplot(aes(x = lives_in, 
             y = perc, 
             label = perc, 
             fill = f_concentration_group)) +
  geom_col(position="fill") +
  
  scale_fill_brewer(palette = "Spectral") +
  labs(title = "Use of F Toothpaste by Area", 
       fill = "Concentration",
       x = "Area", 
       y = "", 
       caption = " Numbers on bars corresponds to the number of persons. Latvia N = 2 270, Lithuania = 2 815."
       ) + 
#  geom_text(
#    aes(label = perc),
#    size = 2.5, 
#    position = position_fill(0.5)
#  ) +
  scale_y_continuous(labels = label_percent()) +
  # divide by country
  facet_grid( country ~ .) 

ggsave(
  here("figures", "figure_area.pdf"),
  width = 20, height = 20, units = "cm", 
  # res = 300, 
  device='pdf'
)

Differences per living area


table(df$lives_in, df$f_concentration_group)
            
             No fluoride 1000-1399 ppm 1400-1500 ppm   NA
  City               395           331          1334  496
  Rural area         136           113           600  191
  Town               244           216          1080  300
chisq.test(table(df$lives_in, df$f_concentration_group))

    Pearson's Chi-squared test

data:  table(df$lives_in, df$f_concentration_group)
X-squared = 22.913, df = 6, p-value = 0.0008261
mosaicplot(table(df$f_concentration_group, df$lives_in), shade = T)

Latvia per living area


table(df_latvia$lives_in, df_latvia$f_concentration_group)
            
             No fluoride 1000-1399 ppm 1400-1500 ppm  NA
  City               203           132           602 207
  Rural area          50            33           270  69
  Town                69            92           484  83
chisq.test(table(df_latvia$lives_in, df_latvia$f_concentration_group))

    Pearson's Chi-squared test

data:  table(df_latvia$lives_in, df_latvia$f_concentration_group)
X-squared = 58.621, df = 6, p-value = 8.574e-11
mosaicplot(table(df_latvia$f_concentration_group, df_latvia$lives_in), shade = T)

Lithuania per living area


table(df_lithuania$lives_in, df_lithuania$f_concentration_group)
            
             No fluoride 1000-1399 ppm 1400-1500 ppm  NA
  City               192           199           732 289
  Rural area          86            80           330 122
  Town               175           124           596 217
chisq.test(table(df_lithuania$lives_in, df_lithuania$f_concentration_group))

    Pearson's Chi-squared test

data:  table(df_lithuania$lives_in, df_lithuania$f_concentration_group)
X-squared = 7.0444, df = 6, p-value = 0.3168
mosaicplot(table(df_lithuania$f_concentration_group, df_lithuania$lives_in), shade = T)

F concentration analysis

janitor::tabyl(df$f_concentration) %>% 
  adorn_pct_formatting()
 df$f_concentration    n percent valid_percent
                  0  775   14.3%         15.2%
                110    2    0.0%          0.0%
                125    8    0.1%          0.2%
                198    1    0.0%          0.0%
                250    6    0.1%          0.1%
                450    9    0.2%          0.2%
                498   26    0.5%          0.5%
                500  185    3.4%          3.6%
                504   13    0.2%          0.3%
                550    6    0.1%          0.1%
                594   13    0.2%          0.3%
                600    8    0.1%          0.2%
                650    1    0.0%          0.0%
                700    6    0.1%          0.1%
                792    1    0.0%          0.0%
                796    5    0.1%          0.1%
                890    5    0.1%          0.1%
                900   18    0.3%          0.4%
                905    4    0.1%          0.1%
                920    2    0.0%          0.0%
                950  327    6.0%          6.4%
               1000  292    5.4%          5.7%
               1040    9    0.2%          0.2%
               1050    2    0.0%          0.0%
               1100   29    0.5%          0.6%
               1131  112    2.1%          2.2%
               1136   44    0.8%          0.9%
               1176    6    0.1%          0.1%
               1200    3    0.1%          0.1%
               1225    1    0.0%          0.0%
               1250   29    0.5%          0.6%
               1350   17    0.3%          0.3%
               1357    3    0.1%          0.1%
               1360  103    1.9%          2.0%
               1400  457    8.4%          9.0%
               1426    4    0.1%          0.1%
               1440    1    0.0%          0.0%
               1447    2    0.0%          0.0%
               1448  118    2.2%          2.3%
               1450 2215   40.7%         43.6%
               1454    7    0.1%          0.1%
               1455    2    0.0%          0.0%
               1476   28    0.5%          0.6%
               1482   53    1.0%          1.0%
               1485    1    0.0%          0.0%
               1490   10    0.2%          0.2%
               1500  116    2.1%          2.3%
                 NA  351    6.5%             -
df %>% 
  ggplot(aes(x = f_concentration)) + 
  geom_histogram(bins = 10)

Table f concentration by age with means, etc

df %>% 
  group_by(`f_concentration_group`) %>% 
  summarise(n = n(), 
            mean = mean(`age`), 
            sd = sd(`age`), 
            min = min(`age`), 
            max = max(`age`)) %>% 
  mutate_if(is.numeric, round, 1)
df$f_concentration_test <- factor(df$f_concentration, 
                             levels = c(
                              "NA",
                              "No fluoride", 
                              "<1000 ppm", 
                              "551 to 999 ppm", 
                              "1000 and 1500 ppm"))
df %>% 
  janitor::tabyl(age_group, `f_concentration`) %>% 
  knitr::kable()
age_group 0 1000 1040 1050 110 1100 1131 1136 1176 1200 1225 125 1250 1350 1357 1360 1400 1426 1440 1447 1448 1450 1454 1455 1476 1482 1485 1490 1500 198 250 450 498 500 504 550 594 600 650 700 792 796 890 900 905 920 950 NA_
0 ~ 5 49 46 0 0 2 4 2 0 0 0 1 0 1 1 0 0 14 0 0 0 2 57 0 0 0 1 0 0 3 0 2 1 16 93 2 4 0 0 0 0 0 0 0 1 0 0 2 59
6 ~ 12 149 100 3 0 0 7 12 9 1 0 0 0 3 4 0 9 201 0 1 1 29 415 2 0 4 9 0 0 34 1 4 4 10 68 10 2 0 1 1 2 0 0 1 10 0 1 62 78
13 ~ 18 145 31 2 0 0 1 21 6 2 0 0 2 7 1 0 15 46 0 0 1 19 454 2 0 8 10 1 0 21 0 0 2 0 4 1 0 6 3 0 2 0 2 0 4 0 0 55 41
> 18 432 115 4 2 0 17 77 29 3 3 0 6 18 11 3 79 196 4 0 0 68 1289 3 2 16 33 0 10 58 0 0 2 0 20 0 0 7 4 0 2 1 3 4 3 4 1 208 173
df %>% 
  janitor::tabyl(`lives_in`, `f_concentration`) %>% 
  janitor::adorn_totals(c("row", "col")) %>%
  janitor::adorn_percentages("row") %>% 
  janitor::adorn_pct_formatting(rounding = "half up", digits = 0) %>%
  janitor::adorn_ns() %>%
  janitor::adorn_title("combined") %>%
  knitr::kable()
lives_in/f_concentration 0 1000 1040 1050 110 1100 1131 1136 1176 1200 1225 125 1250 1350 1357 1360 1400 1426 1440 1447 1448 1450 1454 1455 1476 1482 1485 1490 1500 198 250 450 498 500 504 550 594 600 650 700 792 796 890 900 905 920 950 NA_ Total
City 15% (395) 6% (145) 0% (4) 0% (2) 0% (2) 1% (13) 2% (49) 1% (20) 0% (6) 0% (2) 0% (1) 0% (7) 1% (20) 0% (10) 0% (3) 2% (47) 10% (252) 0% (2) 0% (1) 0% (2) 3% (68) 36% (927) 0% (7) 0% (0) 1% (20) 0% (6) 0% (1) 0% (3) 2% (45) 0% (1) 0% (5) 0% (1) 1% (16) 4% (97) 0% (1) 0% (2) 0% (8) 0% (5) 0% (0) 0% (6) 0% (1) 0% (5) 0% (5) 1% (16) 0% (3) 0% (2) 7% (190) 5% (132) 100% (2556)
Rural area 13% (136) 4% (41) 0% (3) 0% (0) 0% (0) 1% (10) 3% (30) 1% (8) 0% (0) 0% (1) 0% (0) 0% (1) 0% (3) 0% (1) 0% (0) 1% (15) 6% (63) 0% (0) 0% (0) 0% (0) 2% (17) 44% (460) 0% (0) 0% (0) 1% (8) 1% (15) 0% (0) 0% (0) 4% (37) 0% (0) 0% (1) 1% (8) 0% (5) 3% (30) 1% (8) 0% (0) 0% (3) 0% (0) 0% (0) 0% (0) 0% (0) 0% (0) 0% (0) 0% (1) 0% (1) 0% (0) 5% (50) 8% (84) 100% (1040)
Town 13% (244) 6% (106) 0% (2) 0% (0) 0% (0) 0% (6) 2% (33) 1% (16) 0% (0) 0% (0) 0% (0) 0% (0) 0% (6) 0% (6) 0% (0) 2% (41) 8% (142) 0% (2) 0% (0) 0% (0) 2% (33) 45% (828) 0% (0) 0% (2) 0% (0) 2% (32) 0% (0) 0% (7) 2% (34) 0% (0) 0% (0) 0% (0) 0% (5) 3% (58) 0% (4) 0% (4) 0% (2) 0% (3) 0% (1) 0% (0) 0% (0) 0% (0) 0% (0) 0% (1) 0% (0) 0% (0) 5% (87) 7% (135) 100% (1840)
Total 14% (775) 5% (292) 0% (9) 0% (2) 0% (2) 1% (29) 2% (112) 1% (44) 0% (6) 0% (3) 0% (1) 0% (8) 1% (29) 0% (17) 0% (3) 2% (103) 8% (457) 0% (4) 0% (1) 0% (2) 2% (118) 41% (2215) 0% (7) 0% (2) 1% (28) 1% (53) 0% (1) 0% (10) 2% (116) 0% (1) 0% (6) 0% (9) 0% (26) 3% (185) 0% (13) 0% (6) 0% (13) 0% (8) 0% (1) 0% (6) 0% (1) 0% (5) 0% (5) 0% (18) 0% (4) 0% (2) 6% (327) 6% (351) 100% (5436)
df %>% 
  janitor::tabyl(age_group, `f_concentration`) %>% 
  janitor::adorn_totals(c("row", "col")) %>%
  janitor::adorn_percentages("row") %>% 
  janitor::adorn_pct_formatting(rounding = "half up", digits = 0) %>%
  janitor::adorn_ns() %>%
  janitor::adorn_title("combined") %>%
  knitr::kable() %>% kableExtra::kable_styling(bootstrap_options = c("striped", "hover"))
age_group/f_concentration 0 1000 1040 1050 110 1100 1131 1136 1176 1200 1225 125 1250 1350 1357 1360 1400 1426 1440 1447 1448 1450 1454 1455 1476 1482 1485 1490 1500 198 250 450 498 500 504 550 594 600 650 700 792 796 890 900 905 920 950 NA_ Total
0 ~ 5 13% (49) 13% (46) 0% (0) 0% (0) 1% (2) 1% (4) 1% (2) 0% (0) 0% (0) 0% (0) 0% (1) 0% (0) 0% (1) 0% (1) 0% (0) 0% (0) 4% (14) 0% (0) 0% (0) 0% (0) 1% (2) 16% (57) 0% (0) 0% (0) 0% (0) 0% (1) 0% (0) 0% (0) 1% (3) 0% (0) 1% (2) 0% (1) 4% (16) 26% (93) 1% (2) 1% (4) 0% (0) 0% (0) 0% (0) 0% (0) 0% (0) 0% (0) 0% (0) 0% (1) 0% (0) 0% (0) 1% (2) 16% (59) 100% (363)
6 ~ 12 12% (149) 8% (100) 0% (3) 0% (0) 0% (0) 1% (7) 1% (12) 1% (9) 0% (1) 0% (0) 0% (0) 0% (0) 0% (3) 0% (4) 0% (0) 1% (9) 16% (201) 0% (0) 0% (1) 0% (1) 2% (29) 33% (415) 0% (2) 0% (0) 0% (4) 1% (9) 0% (0) 0% (0) 3% (34) 0% (1) 0% (4) 0% (4) 1% (10) 5% (68) 1% (10) 0% (2) 0% (0) 0% (1) 0% (1) 0% (2) 0% (0) 0% (0) 0% (1) 1% (10) 0% (0) 0% (1) 5% (62) 6% (78) 100% (1248)
13 ~ 18 16% (145) 3% (31) 0% (2) 0% (0) 0% (0) 0% (1) 2% (21) 1% (6) 0% (2) 0% (0) 0% (0) 0% (2) 1% (7) 0% (1) 0% (0) 2% (15) 5% (46) 0% (0) 0% (0) 0% (1) 2% (19) 50% (454) 0% (2) 0% (0) 1% (8) 1% (10) 0% (1) 0% (0) 2% (21) 0% (0) 0% (0) 0% (2) 0% (0) 0% (4) 0% (1) 0% (0) 1% (6) 0% (3) 0% (0) 0% (2) 0% (0) 0% (2) 0% (0) 0% (4) 0% (0) 0% (0) 6% (55) 4% (41) 100% (915)
> 18 15% (432) 4% (115) 0% (4) 0% (2) 0% (0) 1% (17) 3% (77) 1% (29) 0% (3) 0% (3) 0% (0) 0% (6) 1% (18) 0% (11) 0% (3) 3% (79) 7% (196) 0% (4) 0% (0) 0% (0) 2% (68) 44% (1289) 0% (3) 0% (2) 1% (16) 1% (33) 0% (0) 0% (10) 2% (58) 0% (0) 0% (0) 0% (2) 0% (0) 1% (20) 0% (0) 0% (0) 0% (7) 0% (4) 0% (0) 0% (2) 0% (1) 0% (3) 0% (4) 0% (3) 0% (4) 0% (1) 7% (208) 6% (173) 100% (2910)
Total 14% (775) 5% (292) 0% (9) 0% (2) 0% (2) 1% (29) 2% (112) 1% (44) 0% (6) 0% (3) 0% (1) 0% (8) 1% (29) 0% (17) 0% (3) 2% (103) 8% (457) 0% (4) 0% (1) 0% (2) 2% (118) 41% (2215) 0% (7) 0% (2) 1% (28) 1% (53) 0% (1) 0% (10) 2% (116) 0% (1) 0% (6) 0% (9) 0% (26) 3% (185) 0% (13) 0% (6) 0% (13) 0% (8) 0% (1) 0% (6) 0% (1) 0% (5) 0% (5) 0% (18) 0% (4) 0% (2) 6% (327) 6% (351) 100% (5436)
df %>%  
  filter(`f_concentration` == "0 - No fluoride") %>% 
  na.omit() %>%    # Using "data", filter out all rows with NAs in aa 
  group_by(`toothpaste`) %>%          # Then, with the filtered data, group it by "bb"
  summarise(Unique_Elements = n_distinct(`toothpaste`))  %>%   # Now summarise with unique elements per group
  knitr::kable()
toothpaste Unique_Elements

Number of different toothpastes

df %>% 
  mutate(toothpaste2 = fct_lump_prop(toothpaste, prop = .02)) %>% 
  group_by(toothpaste2, country) %>% 
  count() %>% 
  pivot_wider(names_from = country, 
              values_from = n) %>% 
  arrange(desc(Lithuania)) %>% 
  arrange(desc(Latvia))
df %>% 
  group_by(toothpaste, country) %>% 
  summarise(count = n())
`summarise()` has grouped output by 'toothpaste'. You can override using the `.groups` argument.
df %>%  
  filter(!`f_concentration` == "0 - No fluoride") %>% 
  #na.omit() %>%    # Using "data", filter out all rows with NAs in aa 
  group_by(`toothpaste`) %>%          # Then, with the filtered data, group it by "bb"
  summarise(Unique_Elements = n_distinct(`toothpaste`))  %>%   # Now summarise with unique elements per group
  knitr::kable()
toothpaste Unique_Elements
15 sekundi 1
Active KIDS Minty Fresh BEAUTY FORMULAS 1
Active oral care 1
Ajona 1
Alkmene 1
Alterra kids 0-6 1
Apacare 1
Apatite 1
Aquafresh 1
Arm and Hammer 1
Aronal 1
Atemfrisch 1
Beverly Hills Formula 1
Bilka 1
Bio2You 1
Biomed 1
BioMin 1
Biorepair 1
BLACK “Ben and Anna” 1
Blanx 1
Blend-a-med 1
Buccotherm 1
Casino Soin Blancheur 1
Charcoal 1
Chicco dentifricio 1
Colgate 1
Colodent 1
Coslys 1
Crest 1
Curaprox 1
Curasept 1
Dabur RED 1
Deliplus 1
Denivit 1
DentaDoc 1
Dentagard 1
Dental tripple effect 1
Dentalux 1
Dentavit 1
Dentesan 1
Dentica 1
Dentodent 1
Dentofit 1
Doctordent 1
Donto dent Brilliant Weiss 1
Dr.organic extra whitening toothpaste 1
Ecodenta 1
Elcemed 1
Elgydium 1
Elkos 1
Elmex 1
Emoform 1
Eurodont 1
Faberlic 1
Firefly 1
Fluocaril 1
Fluor-Fresh 1
Foramen 1
Forever Bright 1
FrezyDERM Sensiteeth First 1
Georganics 1
Glister 1
Green tea tooth paste 1
Greenfeel sensitive 1
Gum 1
Himalaya 1
Humble 1
ISME 1
iWhite 1
Jordan 1
Kamro Oriental care 1
Kedrov 1
Kedrovij balzam 1
Kin 1
Kingfisher fresh 1
Kokosmile 1
Krievu Bio 1
Kruidvat 1
L’Angelica 1
Lacalut 1
LaLigne Dent 1
Langelica 1
Lebon 1
Lesnoj Balsam 1
Lesnoj balzam 1
Lion 1
Livsane 1
Luneje Yunnan Baiyao Tooth 1
M savers 1
Malaleuca 1
Marvis 1
MArvita MedZahncreme Fresh Gel 1
Mega Mint 1
Mentadent 1
Meridol 1
Mirafluor 1
Morrisons Savers 1
Natura Siberica 1
Natural Oaceanic Peal 1
Nida 1
Novij zhemchug 1
Oak Bark Modum Clasic (rusiska) 1
Odol-med3 1
Opalescence 1
Optifresh 1
Oral-B 1
Oreon 1
Organic people Zoom and white 1
Parodontax 1
Parodontol 1
Pasta Del Capitano 1
Pepsodent 1
Pierrot 1
Piksters Plaque GLO 1
Pomorin 1
President 1
Prima Fluor 1
Procare 1
Prokudent 1
R.O.C.S. 1
Rapid White 1
Recepti zdorovja 1
Red Seal 1
Redi-Dental 1
Rose Rio kids 1
Royal Dent 1
Saliagalen 1
SensiDent 1
Sensitive Daily Protection 1
Sensodyne 1
Signal 1
SilcaMed 1
SilcaPutzi 1
Solidax 1
Splat 1
Superdrug 1
Superwhite 1
Svoboda 1
Tebodont 1
The Humble Co 1
Theramed 1
Todaydent 1
Travjanoj balzam 1
Ultradent 1
Uterkam Tea Tree 1
Vademecum 1
Velym 1
Vitex 1
Vivi My 1
Weleda 1
Welsmile 1
White Glo advantage 1
WhiteWash 1
WOOM 1
Xoc 1
Young living 1
Zemciug “Blue Pearl” 1
Zendium 1
Zubnaja pasta otbelivanije 1
Zubnaja pasta Semejnaja 1
Zubnoj pasta “Propolis” 1
Zubnoy dozor Apple 1
Zylia 1

Different ways of presenting the differences between groups (not for publication)

Check groups of F concentration by age


df %>% 
   filter(f_concentration_group != "NA") %>%
  ggplot(aes(x = `f_concentration_group`, y = `age`)) +
  
  geom_jitter(alpha = .05) +
  
  geom_boxplot(alpha = .8) +
  coord_flip() + 
  theme_minimal() + 
  labs(title = "Age distribution per toothpaste F concentration type", 
       x = "Fluoride concentration", 
       y = "Age (years)") 

df %>%
  filter(f_concentration_group != "NA") %>%
  group_by(age_group, f_concentration_group) %>%
  summarise(count = n()) %>%
  mutate(perc = count / sum(count)) %>%
  ggplot(aes(x = age_group, y = perc * 100, fill = f_concentration_group)) +
  geom_bar(stat = "identity") +
  facet_grid(f_concentration_group ~ .) +
  theme_minimal() +
  labs(title = "Fluoride Concentration toothpaste by age group",
       y = "Percentage",
       x = "Age group",
       fill = "Fluoride concentration") +
  scale_fill_brewer(palette = "Spectral")
`summarise()` has grouped output by 'age_group'. You can override using the `.groups` argument.

df %>% 
  filter(f_concentration_group != "NA") %>% 
  group_by(age_group, f_concentration_group) %>% 
  summarise(count = n()) %>% 
  mutate(perc=count/sum(count)) %>% 
  ggplot(aes(x = age_group, y = perc*100, fill = f_concentration_group)) +
  geom_bar(stat="identity") + 
  facet_wrap(~f_concentration_group) + 
  theme_minimal() + 
  labs(title = "Fluoride Concentration toothpaste by age group", 
       y = "Percentage", 
       x = "Age group", 
       fill = "Fluoride concentration") + 
  coord_flip() +
  scale_fill_brewer(palette = "Spectral")
`summarise()` has grouped output by 'age_group'. You can override using the `.groups` argument.

df %>%
  filter(f_concentration_group != "NA") %>% 
  group_by(lives_in, f_concentration_group) %>%
  summarise(count = n()) %>%
  mutate(perc = count / sum(count)) %>%
  ggplot(aes(x = lives_in, y = perc * 100, fill = f_concentration_group)) +
  geom_bar(stat = "identity") +
  facet_wrap( ~ f_concentration_group) +
  theme_minimal() +
  labs(title = "Fluoride Concentration toothpaste by location",
       y = "Percentage",
       x = "Location",
       fill = "Fluoride concentration") +
  coord_flip() +
  scale_fill_brewer(palette = "Spectral")
`summarise()` has grouped output by 'lives_in'. You can override using the `.groups` argument.

Codebook

# df %>% 
#  select(-c(family_code, nr_of_family_members)) %>% 
#  dataReporter::makeCodebook()
