library(tidyverse)
library(stargazer)
library(ggthemes)
library(fifer)
options(digits = 3)
# df <- read.csv2("df.csv", header = T, sep = ";")
df <- read_csv2("df.csv", na = c("", " ", "NA"))
df <- df[, !duplicated(colnames(df))]
df <- df %>%
select(-c(C37V:Trauma_37))
Error in select(., -c(C37V:Trauma_37)) :
unused argument (-c(C37V:Trauma_37))
dim(df)
[1] 2138 416
df %>%
group_by(RegionName, `1_gender`) %>%
summarise(n=n()) %>%
spread(`1_gender`, n)
addmargins(table(df$RegionName, df$`1_gender`))
F M Sum
Kurzeme 145 144 289
Latgale 157 137 294
Pieriga 198 215 413
Riga 292 353 645
Vidzeme 86 116 202
Zemgale 153 142 295
Sum 1031 1107 2138
df %>%
group_by(`2_Live_in`, FAS) %>%
summarise( n = n() ) %>%
spread(FAS, n, fill = 0)
df %>%
group_by(`1_gender`, FAS) %>%
summarise( n = n()) %>%
spread(FAS, n)
nrow(subset(df, D1T > 0))
[1] 2023
nrow(subset(df,D3T > 0))
[1] 937
nrow(subset(df,D5T > 0))
[1] 470
nrow(subset(df, FT > 0))
[1] 1411
nrow(subset(df, MT > 0))
[1] 36
nrow(subset(df, D1MFT > 0))
[1] 2105
nrow(subset(df, D3MFT > 0))
[1] 1705
df %>%
summarise_each(funs(mean, median, sd) , D1S:Sealants)
df %>%
summarise_each(funs(quantile(., probs = 0.25)) , D1S:Sealants)
df %>%
summarise_each(funs(quantile(., probs = 0.75)) , D1S:Sealants)
Recode D3MFT in 0, 1
df <- df %>%
mutate( bin.D3T = ifelse(D3MFT == 0, 0, 1))
df$bin.D3T <- as.factor(df$bin.D3T)
df %>%
group_by(`1_gender`) %>%
summarise_each(funs(mean) , D1T:Sealants)
t.test(df$D1T~df$`1_gender`)
Welch Two Sample t-test
data: df$D1T by df$`1_gender`
t = -1, df = 2000, p-value = 0.2
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.59 0.14
sample estimates:
mean in group F mean in group M
5.74 5.96
t.test(df$D3T~df$`1_gender`)
Welch Two Sample t-test
data: df$D3T by df$`1_gender`
t = 1, df = 2000, p-value = 0.3
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0537 0.1744
sample estimates:
mean in group F mean in group M
0.919 0.858
t.test(df$D5T~df$`1_gender`)
Welch Two Sample t-test
data: df$D5T by df$`1_gender`
t = -0.5, df = 2000, p-value = 0.6
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.112 0.066
sample estimates:
mean in group F mean in group M
0.400 0.423
t.test(df$FT~df$`1_gender`)
Welch Two Sample t-test
data: df$FT by df$`1_gender`
t = 2, df = 2000, p-value = 0.08
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0227 0.3572
sample estimates:
mean in group F mean in group M
2.12 1.95
t.test(df$MT~df$`1_gender`)
Welch Two Sample t-test
data: df$MT by df$`1_gender`
t = 0.8, df = 2000, p-value = 0.4
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.00801 0.02024
sample estimates:
mean in group F mean in group M
0.0233 0.0172
t.test(df$D1MFT~df$`1_gender`)
Welch Two Sample t-test
data: df$D1MFT by df$`1_gender`
t = -0.06, df = 2000, p-value = 1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.469 0.440
sample estimates:
mean in group F mean in group M
9.20 9.21
t.test(df$D3MFT~df$`1_gender`)
Welch Two Sample t-test
data: df$D3MFT by df$`1_gender`
t = 2, df = 2000, p-value = 0.1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0439 0.4650
sample estimates:
mean in group F mean in group M
3.46 3.25
t.test(df$D5MFT~df$`1_gender`)
Welch Two Sample t-test
data: df$D5MFT by df$`1_gender`
t = 1, df = 2000, p-value = 0.2
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.061 0.361
sample estimates:
mean in group F mean in group M
2.54 2.39
t.test(df$D1S~df$`1_gender`)
Welch Two Sample t-test
data: df$D1S by df$`1_gender`
t = -2, df = 2000, p-value = 0.1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.610 0.161
sample estimates:
mean in group F mean in group M
12.3 13.0
t.test(df$D3S~df$`1_gender`)
Welch Two Sample t-test
data: df$D3S by df$`1_gender`
t = 0.4, df = 2000, p-value = 0.7
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.113 0.162
sample estimates:
mean in group F mean in group M
1.03 1.00
t.test(df$D5S~df$`1_gender`)
Welch Two Sample t-test
data: df$D5S by df$`1_gender`
t = -1, df = 2000, p-value = 0.3
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.2595 0.0761
sample estimates:
mean in group F mean in group M
0.572 0.664
t.test(df$FS~df$`1_gender`)
Welch Two Sample t-test
data: df$FS by df$`1_gender`
t = 0.5, df = 2000, p-value = 0.6
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.249 0.436
sample estimates:
mean in group F mean in group M
3.28 3.19
t.test(df$MS~df$`1_gender`)
Welch Two Sample t-test
data: df$MS by df$`1_gender`
t = 0.6, df = 2000, p-value = 0.5
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0480 0.0901
sample estimates:
mean in group F mean in group M
0.1096 0.0885
t.test(df$D1MFS~df$`1_gender`)
Welch Two Sample t-test
data: df$D1MFS by df$`1_gender`
t = -1, df = 2000, p-value = 0.2
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.789 0.435
sample estimates:
mean in group F mean in group M
17.2 17.9
t.test(df$D3MFS~df$`1_gender`)
Welch Two Sample t-test
data: df$D3MFS by df$`1_gender`
t = 0.2, df = 2000, p-value = 0.8
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.428 0.522
sample estimates:
mean in group F mean in group M
4.99 4.95
t.test(df$D5MFS~df$`1_gender`)
Welch Two Sample t-test
data: df$D5MFS by df$`1_gender`
t = 0.1, df = 2000, p-value = 0.9
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.395 0.441
sample estimates:
mean in group F mean in group M
3.96 3.94
t.test(df$Sealants~df$`1_gender`)
Welch Two Sample t-test
data: df$Sealants by df$`1_gender`
t = 0.4, df = 2000, p-value = 0.7
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0473 0.0695
sample estimates:
mean in group F mean in group M
0.148 0.137
df.long <- df %>%
gather("byTooth", "valueByTooth", D1T:D5MFT) %>%
gather("bySurface", "valueBySurface", D1S:Sealants)
df.long$byTooth <- ordered(df.long$byTooth, levels = c(
"D1T", "D3T", "D5T", "FT",
"MT", "D1MFT", "D3MFT", "D5MFT"))
df.long$bySurface <- ordered(df.long$bySurface, levels = c(
"D1S", "D3S", "D5S", "FS", "MS",
"D1MFS", "D3MFS", "D5MFS", "Sealants"))
df.long %>%
ggplot(aes(factor(byTooth), valueByTooth)) +
geom_boxplot(aes( fill = `1_gender` ) ) +
theme_minimal() +
labs(title = " ", x = " ", y = "Zobi", color = "Dzimums\n")+
ggsave("./plots/dmftByGender.png", width=8, height=6, dpi=250)
df.long %>%
ggplot(aes(factor(bySurface), valueBySurface)) +
geom_boxplot(aes(fill = factor(`1_gender`))) +
theme_minimal() +
labs(title = " ", x = " ", y = "Zobi", color = "Dzimums\n") +
ggsave("./plots/dmfsByGender.png", width=8, height=6, dpi=250)
df %>%
group_by(RegionName, bin.D3T) %>%
summarise(n=n()) %>%
spread(bin.D3T, n)
regionxD3T <- table(df$RegionName, df$bin.D3T)
chisq.test(regionxD3T)
Pearson's Chi-squared test
data: regionxD3T
X-squared = 20, df = 5, p-value = 0.005
chisq.post.hoc(regionxD3T)
Adjusted p-values used the fdr method.
df %>%
group_by(RegionName) %>%
summarise_each(funs(mean) , D1T:Sealants)
summary(aov(df$D3MFT~df$RegionName))
Df Sum Sq Mean Sq F value Pr(>F)
df$RegionName 5 240 48.0 5.38 6.3e-05 ***
Residuals 2132 19028 8.9
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
TukeyHSD(aov(df$D3MFT~df$RegionName))
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = df$D3MFT ~ df$RegionName)
$`df$RegionName`
diff lwr upr p adj
Latgale-Kurzeme 0.8137 0.10785 1.5195 0.013
Pieriga-Kurzeme 0.2460 -0.40754 0.8994 0.892
Riga-Kurzeme 0.1520 -0.45121 0.7551 0.980
Vidzeme-Kurzeme 0.9262 0.14470 1.7076 0.010
Zemgale-Kurzeme 0.7602 0.05499 1.4655 0.026
Pieriga-Latgale -0.5677 -1.21796 0.0825 0.127
Riga-Latgale -0.6617 -1.26135 -0.0621 0.021
Vidzeme-Latgale 0.1125 -0.66625 0.8912 0.998
Zemgale-Latgale -0.0535 -0.75567 0.6488 1.000
Riga-Pieriga -0.0940 -0.63101 0.4430 0.996
Vidzeme-Pieriga 0.6802 -0.05140 1.4118 0.086
Zemgale-Pieriga 0.5143 -0.13529 1.1639 0.212
Vidzeme-Riga 0.7742 0.08717 1.4613 0.017
Zemgale-Riga 0.6083 0.00936 1.2072 0.044
Zemgale-Vidzeme -0.1659 -0.94413 0.6123 0.990
df.long %>%
ggplot(aes(factor(bySurface), valueBySurface)) +
geom_boxplot(aes(fill = factor(RegionName))) +
theme_minimal() +
labs(title = " ", x = " ", y = "Zobi", color = "Dzimums\n") +
ggsave("./plots/dmfsByRegion.png", width=8, height=6, dpi=250)
TukeyHSD(aov(df$D3MFT~df$RegionName))
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = df$D3MFT ~ df$RegionName)
$`df$RegionName`
diff lwr upr p adj
Latgale-Kurzeme 0.8137 0.10785 1.5195 0.013
Pieriga-Kurzeme 0.2460 -0.40754 0.8994 0.892
Riga-Kurzeme 0.1520 -0.45121 0.7551 0.980
Vidzeme-Kurzeme 0.9262 0.14470 1.7076 0.010
Zemgale-Kurzeme 0.7602 0.05499 1.4655 0.026
Pieriga-Latgale -0.5677 -1.21796 0.0825 0.127
Riga-Latgale -0.6617 -1.26135 -0.0621 0.021
Vidzeme-Latgale 0.1125 -0.66625 0.8912 0.998
Zemgale-Latgale -0.0535 -0.75567 0.6488 1.000
Riga-Pieriga -0.0940 -0.63101 0.4430 0.996
Vidzeme-Pieriga 0.6802 -0.05140 1.4118 0.086
Zemgale-Pieriga 0.5143 -0.13529 1.1639 0.212
Vidzeme-Riga 0.7742 0.08717 1.4613 0.017
Zemgale-Riga 0.6083 0.00936 1.2072 0.044
Zemgale-Vidzeme -0.1659 -0.94413 0.6123 0.990
TukeyHSD(aov(df$D3MFS~df$RegionName))
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = df$D3MFS ~ df$RegionName)
$`df$RegionName`
diff lwr upr p adj
Latgale-Kurzeme 1.0610 -0.2634 2.385 0.200
Pieriga-Kurzeme 0.7550 -0.4711 1.981 0.494
Riga-Kurzeme 0.5858 -0.5459 1.718 0.679
Vidzeme-Kurzeme 1.8483 0.3820 3.315 0.004
Zemgale-Kurzeme 0.8296 -0.4937 2.153 0.474
Pieriga-Latgale -0.3060 -1.5260 0.914 0.980
Riga-Latgale -0.4752 -1.6003 0.650 0.835
Vidzeme-Latgale 0.7873 -0.6739 2.248 0.640
Zemgale-Latgale -0.2314 -1.5490 1.086 0.996
Riga-Pieriga -0.1692 -1.1768 0.838 0.997
Vidzeme-Pieriga 1.0933 -0.2794 2.466 0.206
Zemgale-Pieriga 0.0746 -1.1442 1.293 1.000
Vidzeme-Riga 1.2625 -0.0266 2.552 0.059
Zemgale-Riga 0.2438 -0.8800 1.368 0.990
Zemgale-Vidzeme -1.0187 -2.4789 0.441 0.348
df.long %>%
ggplot(aes(factor(byTooth), valueByTooth)) +
geom_boxplot(aes(fill = factor(RegionName))) +
theme_minimal() +
labs(title = " ", x = " ", y = "Zobi", color = "Dzimums\n") +
ggsave("./plots/dmftByRegion.png", width=8, height=6, dpi=250)
chisq.test(df$bin.D3T, df$FAS_cat)
df %>%
group_by(FAS_cat) %>%
summarise_each(funs(mean) , D1T:Sealants)
TukeyHSD(aov(df$D3MFT~df$FAS_cat))
df %>%
group_by(`2_Live_in`, bin.D3T) %>%
summarise(n=n()) %>%
spread(bin.D3T, n)
chisq.test(df$`2_Live_in`, df$bin.D3T)
df %>%
group_by(`2_Live_in`) %>%
summarise_each(funs(mean) , D1T:Sealants)
TukeyHSD(aov(df$D3MFT~df$`2_Live_in`))
df$tsp_daily <- as.numeric(df$tsp_daily)
Elimino los outliers
outlierKD <- function(dt, var) {
var_name <- eval(substitute(var),eval(dt))
tot <- sum(!is.na(var_name))
na1 <- sum(is.na(var_name))
m1 <- mean(var_name, na.rm = T)
par(mfrow=c(2, 2), oma=c(0,0,3,0))
boxplot(var_name, main="With outliers")
hist(var_name, main="With outliers", xlab=NA, ylab=NA)
outlier <- boxplot.stats(var_name)$out
mo <- mean(outlier)
var_name <- ifelse(var_name %in% outlier, NA, var_name)
boxplot(var_name, main="Without outliers")
hist(var_name, main="Without outliers", xlab=NA, ylab=NA)
title("Outlier Check", outer=TRUE)
na2 <- sum(is.na(var_name))
cat("Outliers identified:", na2 - na1, "\n")
cat("Propotion (%) of outliers:", round((na2 - na1) / tot*100, 1), "\n")
cat("Mean of the outliers:", round(mo, 2), "\n")
m2 <- mean(var_name, na.rm = T)
cat("Mean without removing outliers:", round(m1, 2), "\n")
cat("Mean if we remove outliers:", round(m2, 2), "\n")
response <- readline(prompt="Do you want to remove outliers and to replace with NA? [yes/no]: ")
if(response == "y" | response == "yes"){
dt[as.character(substitute(var))] <- invisible(var_name)
assign(as.character(as.list(match.call())$dt), dt, envir = .GlobalEnv)
cat("Outliers successfully removed", "\n")
return(invisible(dt))
} else{
cat("Nothing changed", "\n")
return(invisible(var_name))
}
}
df.log$df.log$tsp_daily <- ifelse(df.log$df.log$tsp_daily < 5, 0 ,1)
Unknown column 'df.log'Unknown column 'df.log'
TukeyHSD(aov(df$tsp_daily ~ df$RegionName))
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = df$tsp_daily ~ df$RegionName)
$`df$RegionName`
diff lwr upr p adj
Latgale-Kurzeme 1.1131 0.456 1.7706 0.000
Pieriga-Kurzeme 0.4413 -0.166 1.0483 0.302
Riga-Kurzeme 0.2259 -0.336 0.7878 0.862
Vidzeme-Kurzeme 0.0109 -0.712 0.7334 1.000
Zemgale-Kurzeme 0.3814 -0.274 1.0371 0.559
Pieriga-Latgale -0.6718 -1.276 -0.0680 0.019
Riga-Latgale -0.8872 -1.446 -0.3288 0.000
Vidzeme-Latgale -1.1021 -1.822 -0.3824 0.000
Zemgale-Latgale -0.7317 -1.384 -0.0789 0.018
Riga-Pieriga -0.2153 -0.713 0.2826 0.820
Vidzeme-Pieriga -0.4303 -1.104 0.2436 0.452
Zemgale-Pieriga -0.0599 -0.662 0.5420 1.000
Vidzeme-Riga -0.2150 -0.849 0.4186 0.928
Zemgale-Riga 0.1554 -0.401 0.7118 0.968
Zemgale-Vidzeme 0.3704 -0.348 1.0886 0.683
df[tsp_daily_cat] <- NA
Error in `[<-.data.frame`(`*tmp*`, tsp_daily_cat, value = NA) :
object 'tsp_daily_cat' not found
ggplot(df, aes(x=tsp_daily)) + geom_histogram()
sd(df$tsp_daily, na.rm = T)
df.log <- df
df.log$`8_Frequency_of_toothbrushing`[df.log$`8_Frequency_of_toothbrushing`=="Once per day"] <- "0"
df.log$`8_Frequency_of_toothbrushing`[df.log$`8_Frequency_of_toothbrushing`=="Two or more times per day"] <- "0"
df.log$`1_gender` <- ifelse(df.log$`1_gender` == "F", 0 ,1)
df.log$FAS_cat <- ifelse(df.log$FAS_cat == "High affluence", 0 ,1)
df.log$`8_Frequency_of_toothbrushing` <- ifelse(df.log$`8_Frequency_of_toothbrushing` == "0", 0 ,1)
df.log$`4_Frequency_of_dentist_visits_in_last_12_months` <- ifelse(df.log$`4_Frequency_of_dentist_visits_in_last_12_months` == "Two or more times", 0 ,1)
df.log$`7_Frequency_of_dental_hygienist_visits` <- ifelse(df.log$`7_Frequency_of_dental_hygienist_visits` == "Two or more times per year", 0 ,1)
df.log$`9_Usage_of_dental_floss` <- ifelse(df.log$`9_Usage_of_dental_floss` == "Yes", 0 ,1)
df.log$`9_Usage_of_mouth_wash` <- ifelse(df.log$`9_Usage_of_mouth_wash` == "Yes", 0 ,1)
df.log$`11_Usage_of_fluoride_supplements` <- ifelse(df.log$`11_Usage_of_fluoride_supplements` == "Yes, now", 0 ,1)
df.log$`13_Eating_habits_grouped` <- ifelse(df.log$`13_Eating_habits_grouped` == 1, 1 ,0)
df.log$`18_Frequency_of_smoking` <- ifelse(df.log$`18_Frequency_of_smoking` == "Never", 1 ,0)
df.log$tsp_daily <- ifelse(df.log$tsp_daily < 5, 0 ,1)
df.log$D1MFT <- ifelse(df.log$D1MFT > 2, 1 ,0) #menos que dos es "sin caries"
df.log$D3MFT <- ifelse(df.log$D3MFT > 2, 1 ,0)
df.log$D5MFT <- ifelse(df.log$D5MFT == 0, 0 ,1)
d1 <- glm(D1MFT ~
`1_gender` +
FAS_cat +
`8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` +
`7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_dental_floss` +
`9_Usage_of_mouth_wash` +
`11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` +
`18_Frequency_of_smoking` +
tsp_daily,
data = df.log,
family = binomial)
summary(d1)
Call:
glm(formula = D1MFT ~ `1_gender` + FAS_cat + `8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` + `7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_dental_floss` + `9_Usage_of_mouth_wash` + `11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` + `18_Frequency_of_smoking` +
tsp_daily, family = binomial, data = df.log)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.8463 0.2680 0.3389 0.4147 0.6032
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.75073 1.07449 2.560 0.0105 *
`1_gender` -0.10362 0.18718 -0.554 0.5799
FAS_cat -0.28642 0.25492 -1.124 0.2612
`8_Frequency_of_toothbrushing` 0.41041 0.28936 1.418 0.1561
`4_Frequency_of_dentist_visits_in_last_12_months` -0.94584 0.21374 -4.425 9.63e-06 ***
`7_Frequency_of_dental_hygienist_visits` 0.37377 0.20676 1.808 0.0706 .
`9_Usage_of_dental_floss` 0.16756 0.19820 0.845 0.3979
`9_Usage_of_mouth_wash` -0.19833 0.18573 -1.068 0.2856
`11_Usage_of_fluoride_supplements` -0.07959 0.74614 -0.107 0.9151
`13_Eating_habits_grouped` 0.22193 0.21989 1.009 0.3128
`18_Frequency_of_smoking` 0.47470 0.76250 0.623 0.5336
tsp_daily 0.32335 0.23933 1.351 0.1767
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 963.64 on 2011 degrees of freedom
Residual deviance: 933.33 on 2000 degrees of freedom
(126 observations deleted due to missingness)
AIC: 957.33
Number of Fisher Scoring iterations: 6
d2 <- glm(D1MFT ~
`1_gender` +
FAS_cat +
`8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` +
`7_Frequency_of_dental_hygienist_visits` +
`11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` +
`18_Frequency_of_smoking` +
tsp_daily,
data = df.log,
family = binomial)
summary(d2)
Call:
glm(formula = D1MFT ~ `1_gender` + FAS_cat + `8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` + `7_Frequency_of_dental_hygienist_visits` +
`11_Usage_of_fluoride_supplements` + `13_Eating_habits_grouped` +
`18_Frequency_of_smoking` + tsp_daily, family = binomial,
data = df.log)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.7959 0.2787 0.3383 0.4143 0.5683
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.68440 1.06914 2.511 0.0120 *
`1_gender` -0.09274 0.18645 -0.497 0.6189
FAS_cat -0.28502 0.25329 -1.125 0.2605
`8_Frequency_of_toothbrushing` 0.41969 0.28862 1.454 0.1459
`4_Frequency_of_dentist_visits_in_last_12_months` -0.93914 0.21291 -4.411 1.03e-05 ***
`7_Frequency_of_dental_hygienist_visits` 0.38704 0.20425 1.895 0.0581 .
`11_Usage_of_fluoride_supplements` -0.01283 0.74340 -0.017 0.9862
`13_Eating_habits_grouped` 0.21499 0.21928 0.980 0.3269
`18_Frequency_of_smoking` 0.45558 0.75954 0.600 0.5486
tsp_daily 0.33208 0.23915 1.389 0.1650
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 963.64 on 2011 degrees of freedom
Residual deviance: 935.11 on 2002 degrees of freedom
(126 observations deleted due to missingness)
AIC: 955.11
Number of Fisher Scoring iterations: 6
stargazer(d1, d2, type="text", digits=3,
dep.var.labels=c("Caries at D1 more than two ( = 1)"),
covariate.labels=c("Sex (male = 1)",
"FAS (Low = 1)",
"Freq Toothbrushing ( < once per week = 1)",
"Freq visit dentist ( < once per year = 1)",
"Freq visit hygienist ( < once per year = 1)",
"Dental floss (no use = 1)",
"Mouthwash (no use = 1)",
"Use of fluoride supplement (no use = 1)",
"Eating habits (high in sweet = 1)",
"More than one teaspoon in tea, coffee or cacao"),
out="./models/modelsd1-d2.txt")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usednumber of rows of result is not a multiple of vector length (arg 2)
=================================================================================
Dependent variable:
----------------------------------
Caries at D1 more than two ( = 1)
(1) (2)
---------------------------------------------------------------------------------
Sex (male = 1) -0.104 -0.093
(0.187) (0.186)
FAS (Low = 1) -0.286 -0.285
(0.255) (0.253)
Freq Toothbrushing ( < once per week = 1) 0.410 0.420
(0.289) (0.289)
Freq visit dentist ( < once per year = 1) -0.946*** -0.939***
(0.214) (0.213)
Freq visit hygienist ( < once per year = 1) 0.374* 0.387*
(0.207) (0.204)
Dental floss (no use = 1) 0.168
(0.198)
Mouthwash (no use = 1) -0.198
(0.186)
Use of fluoride supplement (no use = 1) -0.080 -0.013
(0.746) (0.743)
Eating habits (high in sweet = 1) 0.222 0.215
(0.220) (0.219)
More than one teaspoon in tea, coffee or cacao 0.475 0.456
(0.763) (0.760)
tsp_daily 0.323 0.332
(0.239) (0.239)
Constant 2.751** 2.684**
(1.074) (1.069)
---------------------------------------------------------------------------------
Observations 2,012 2,012
Log Likelihood -466.664 -467.557
Akaike Inf. Crit. 957.329 955.114
=================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
d3 <- glm(D3MFT ~
`1_gender` +
FAS_cat +
`8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` +
`7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_dental_floss` +
`9_Usage_of_mouth_wash` +
`11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` +
`18_Frequency_of_smoking` +
tsp_daily,
data = df.log,
family = binomial)
summary(d3)
Call:
glm(formula = D3MFT ~ `1_gender` + FAS_cat + `8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` + `7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_dental_floss` + `9_Usage_of_mouth_wash` + `11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` + `18_Frequency_of_smoking` +
tsp_daily, family = binomial, data = df.log)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7257 -1.1872 0.8479 1.0903 1.5071
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.18476 0.56145 0.329 0.742105
`1_gender` -0.18530 0.09369 -1.978 0.047956 *
FAS_cat 0.43178 0.11803 3.658 0.000254 ***
`8_Frequency_of_toothbrushing` 0.29642 0.13185 2.248 0.024567 *
`4_Frequency_of_dentist_visits_in_last_12_months` -0.62962 0.09652 -6.523 6.89e-11 ***
`7_Frequency_of_dental_hygienist_visits` 0.03897 0.10613 0.367 0.713470
`9_Usage_of_dental_floss` 0.07530 0.10031 0.751 0.452860
`9_Usage_of_mouth_wash` -0.26421 0.09215 -2.867 0.004143 **
`11_Usage_of_fluoride_supplements` -0.06927 0.36010 -0.192 0.847466
`13_Eating_habits_grouped` 0.06064 0.10611 0.571 0.567701
`18_Frequency_of_smoking` 0.17637 0.43084 0.409 0.682277
tsp_daily 0.15274 0.11203 1.363 0.172755
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2765.6 on 2011 degrees of freedom
Residual deviance: 2688.5 on 2000 degrees of freedom
(126 observations deleted due to missingness)
AIC: 2712.5
Number of Fisher Scoring iterations: 4
d4 <- glm(D3MFT ~
`1_gender` +
FAS_cat +
`8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` +
`7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_mouth_wash` +
`11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` +
tsp_daily,
data = df.log,
family = binomial)
summary(d4)
Call:
glm(formula = D3MFT ~ `1_gender` + FAS_cat + `8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` + `7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_mouth_wash` + `11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` + tsp_daily, family = binomial,
data = df.log)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7439 -1.1901 0.8557 1.0931 1.5071
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.36606 0.37495 0.976 0.328918
`1_gender` -0.18153 0.09312 -1.949 0.051247 .
FAS_cat 0.44127 0.11763 3.751 0.000176 ***
`8_Frequency_of_toothbrushing` 0.30240 0.13120 2.305 0.021171 *
`4_Frequency_of_dentist_visits_in_last_12_months` -0.62564 0.09623 -6.502 7.95e-11 ***
`7_Frequency_of_dental_hygienist_visits` 0.05080 0.10492 0.484 0.628295
`9_Usage_of_mouth_wash` -0.26068 0.09197 -2.834 0.004591 **
`11_Usage_of_fluoride_supplements` -0.04647 0.35906 -0.129 0.897026
`13_Eating_habits_grouped` 0.05530 0.10595 0.522 0.601733
tsp_daily 0.15530 0.11193 1.387 0.165304
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2767.9 on 2013 degrees of freedom
Residual deviance: 2691.1 on 2004 degrees of freedom
(124 observations deleted due to missingness)
AIC: 2711.1
Number of Fisher Scoring iterations: 4
stargazer(d3, d4, type="text", digits=3,
dep.var.labels=c("Caries at D3 more than 2 ( = 1)"),
covariate.labels=c("Sex (male = 1)",
"FAS (Low = 1)",
"Freq Toothbrushing ( < once per week = 1)",
"Freq visit dentist ( < once per year = 1)",
"Freq visit hygienist ( < once per year = 1)",
"Dental floss (no use = 1)",
"Mouthwash (no use = 1)",
"Use of fluoride supplement (no use = 1)",
"Eating habits (high in sweet = 1)",
"More than one teaspoon in tea, coffee or cacao"),
out="./models/modelsD3-4.txt")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usednumber of rows of result is not a multiple of vector length (arg 2)
===============================================================================
Dependent variable:
--------------------------------
Caries at D3 more than 2 ( = 1)
(1) (2)
-------------------------------------------------------------------------------
Sex (male = 1) -0.185** -0.182*
(0.094) (0.093)
FAS (Low = 1) 0.432*** 0.441***
(0.118) (0.118)
Freq Toothbrushing ( < once per week = 1) 0.296** 0.302**
(0.132) (0.131)
Freq visit dentist ( < once per year = 1) -0.630*** -0.626***
(0.097) (0.096)
Freq visit hygienist ( < once per year = 1) 0.039 0.051
(0.106) (0.105)
Dental floss (no use = 1) 0.075
(0.100)
Mouthwash (no use = 1) -0.264*** -0.261***
(0.092) (0.092)
Use of fluoride supplement (no use = 1) -0.069 -0.046
(0.360) (0.359)
Eating habits (high in sweet = 1) 0.061 0.055
(0.106) (0.106)
More than one teaspoon in tea, coffee or cacao 0.176
(0.431)
tsp_daily 0.153 0.155
(0.112) (0.112)
Constant 0.185 0.366
(0.561) (0.375)
-------------------------------------------------------------------------------
Observations 2,012 2,014
Log Likelihood -1,344.229 -1,345.547
Akaike Inf. Crit. 2,712.459 2,711.095
===============================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
d5 <- glm(D5MFT ~
`1_gender` +
FAS_cat +
`8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` +
`7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_dental_floss` +
`9_Usage_of_mouth_wash` +
`11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` +
`18_Frequency_of_smoking` +
tsp_daily,
data = df.log,
family = binomial)
summary(d5)
Call:
glm(formula = D5MFT ~ `1_gender` + FAS_cat + `8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` + `7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_dental_floss` + `9_Usage_of_mouth_wash` + `11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` + `18_Frequency_of_smoking` +
tsp_daily, family = binomial, data = df.log)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.2043 -1.3152 0.6430 0.8882 1.2015
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.41279 0.72504 3.328 0.000875 ***
`1_gender` -0.10881 0.10451 -1.041 0.297798
FAS_cat 0.26598 0.12955 2.053 0.040057 *
`8_Frequency_of_toothbrushing` 0.33829 0.15095 2.241 0.025017 *
`4_Frequency_of_dentist_visits_in_last_12_months` -0.95454 0.11225 -8.503 < 2e-16 ***
`7_Frequency_of_dental_hygienist_visits` -0.05616 0.12083 -0.465 0.642060
`9_Usage_of_dental_floss` 0.14923 0.11181 1.335 0.181974
`9_Usage_of_mouth_wash` -0.25926 0.10307 -2.515 0.011889 *
`11_Usage_of_fluoride_supplements` -0.68725 0.49943 -1.376 0.168804
`13_Eating_habits_grouped` -0.08052 0.11697 -0.688 0.491211
`18_Frequency_of_smoking` -0.28543 0.52165 -0.547 0.584258
tsp_daily -0.11797 0.12257 -0.963 0.335793
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2385.4 on 2011 degrees of freedom
Residual deviance: 2280.1 on 2000 degrees of freedom
(126 observations deleted due to missingness)
AIC: 2304.1
Number of Fisher Scoring iterations: 4
d6 <- glm(D5MFT ~
`1_gender` +
FAS_cat +
`8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` +
`7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_mouth_wash` +
`11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` +
tsp_daily,
data = df.log,
family = binomial)
summary(d5)
Call:
glm(formula = D5MFT ~ `1_gender` + FAS_cat + `8_Frequency_of_toothbrushing` +
`4_Frequency_of_dentist_visits_in_last_12_months` + `7_Frequency_of_dental_hygienist_visits` +
`9_Usage_of_dental_floss` + `9_Usage_of_mouth_wash` + `11_Usage_of_fluoride_supplements` +
`13_Eating_habits_grouped` + `18_Frequency_of_smoking` +
tsp_daily, family = binomial, data = df.log)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.2043 -1.3152 0.6430 0.8882 1.2015
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 2.41279 0.72504 3.328 0.000875 ***
`1_gender` -0.10881 0.10451 -1.041 0.297798
FAS_cat 0.26598 0.12955 2.053 0.040057 *
`8_Frequency_of_toothbrushing` 0.33829 0.15095 2.241 0.025017 *
`4_Frequency_of_dentist_visits_in_last_12_months` -0.95454 0.11225 -8.503 < 2e-16 ***
`7_Frequency_of_dental_hygienist_visits` -0.05616 0.12083 -0.465 0.642060
`9_Usage_of_dental_floss` 0.14923 0.11181 1.335 0.181974
`9_Usage_of_mouth_wash` -0.25926 0.10307 -2.515 0.011889 *
`11_Usage_of_fluoride_supplements` -0.68725 0.49943 -1.376 0.168804
`13_Eating_habits_grouped` -0.08052 0.11697 -0.688 0.491211
`18_Frequency_of_smoking` -0.28543 0.52165 -0.547 0.584258
tsp_daily -0.11797 0.12257 -0.963 0.335793
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 2385.4 on 2011 degrees of freedom
Residual deviance: 2280.1 on 2000 degrees of freedom
(126 observations deleted due to missingness)
AIC: 2304.1
Number of Fisher Scoring iterations: 4
stargazer(d5, d6, type="text", digits=3,
dep.var.labels=c("Caries at D5 different than 0 (= 1)"),
covariate.labels=c("Sex (male = 1)",
"FAS (Low = 1)",
"Freq Toothbrushing ( < once per week = 1)",
"Freq visit dentist ( < once per year = 1)",
"Freq visit hygienist ( < once per year = 1)",
"Dental floss (no use = 1)",
"Mouthwash (no use = 1)",
"Use of fluoride supplement (no use = 1)",
"Eating habits (high in sweet = 1)",
"More than one teaspoon in tea, coffee or cacao"),
out="./models/modelsD5-6.txt")
the condition has length > 1 and only the first element will be usedthe condition has length > 1 and only the first element will be usednumber of rows of result is not a multiple of vector length (arg 2)
===================================================================================
Dependent variable:
------------------------------------
Caries at D5 different than 0 (= 1)
(1) (2)
-----------------------------------------------------------------------------------
Sex (male = 1) -0.109 -0.094
(0.105) (0.104)
FAS (Low = 1) 0.266** 0.280**
(0.130) (0.129)
Freq Toothbrushing ( < once per week = 1) 0.338** 0.358**
(0.151) (0.150)
Freq visit dentist ( < once per year = 1) -0.955*** -0.946***
(0.112) (0.112)
Freq visit hygienist ( < once per year = 1) -0.056 -0.030
(0.121) (0.119)
Dental floss (no use = 1) 0.149
(0.112)
Mouthwash (no use = 1) -0.259** -0.256**
(0.103) (0.103)
Use of fluoride supplement (no use = 1) -0.687 -0.651
(0.499) (0.498)
Eating habits (high in sweet = 1) -0.081 -0.091
(0.117) (0.117)
More than one teaspoon in tea, coffee or cacao -0.285
(0.522)
tsp_daily -0.118 -0.115
(0.123) (0.122)
Constant 2.413*** 2.150***
(0.725) (0.513)
-----------------------------------------------------------------------------------
Observations 2,012 2,014
Log Likelihood -1,140.061 -1,141.603
Akaike Inf. Crit. 2,304.122 2,303.205
===================================================================================
Note: *p<0.1; **p<0.05; ***p<0.01