require(tidyverse)
require(stargazer)
require(ggthemes)
df <- df[, !duplicated(colnames(df))]
df <- df %>%
select(-c(C17V:Trauma_37))
dim(df)
addmargins(table(df$RegionName, df$`1_gender`))
F M Sum
Kurzeme 145 144 289
Latgale 157 137 294
Pieriga 198 215 413
Riga 292 353 645
Vidzeme 86 116 202
Zemgale 153 142 295
Sum 1031 1107 2138
nrow(subset(df, D1T > 0))
[1] 2023
nrow(subset(df,D3T > 0))
[1] 937
nrow(subset(df,D5T > 0))
[1] 470
nrow(subset(df, FT > 0))
[1] 1411
nrow(subset(df, MT > 0))
[1] 36
nrow(subset(df, D1MFT > 0))
[1] 2105
nrow(subset(df, D3MFT > 0))
[1] 1705
df %>%
summarise_each(funs(mean, median, sd) , D1S:Sealants)
df %>%
summarise_each(funs(quantile(., probs = 0.25)) , D1S:Sealants)
df %>%
summarise_each(funs(quantile(., probs = 0.75)) , D1S:Sealants)
Recode D3MFT in 0, 1
df <- df %>%
mutate( bin.D3T = ifelse(D3MFT == 0, 0, 1))
df$bin.D3T <- as.factor(df$bin.D3T)
df %>%
group_by(`1_gender`) %>%
summarise_each(funs(mean) , D1T:Sealants)
t.test(df$D1T~df$`1_gender`)
Welch Two Sample t-test
data: df$D1T by df$`1_gender`
t = -1, df = 2000, p-value = 0.2
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.59 0.14
sample estimates:
mean in group F mean in group M
5.74 5.96
t.test(df$D3T~df$`1_gender`)
Welch Two Sample t-test
data: df$D3T by df$`1_gender`
t = 1, df = 2000, p-value = 0.3
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0537 0.1744
sample estimates:
mean in group F mean in group M
0.919 0.858
t.test(df$D5T~df$`1_gender`)
Welch Two Sample t-test
data: df$D5T by df$`1_gender`
t = -0.5, df = 2000, p-value = 0.6
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.112 0.066
sample estimates:
mean in group F mean in group M
0.400 0.423
t.test(df$FT~df$`1_gender`)
Welch Two Sample t-test
data: df$FT by df$`1_gender`
t = 2, df = 2000, p-value = 0.08
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0227 0.3572
sample estimates:
mean in group F mean in group M
2.12 1.95
t.test(df$MT~df$`1_gender`)
Welch Two Sample t-test
data: df$MT by df$`1_gender`
t = 0.8, df = 2000, p-value = 0.4
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.00801 0.02024
sample estimates:
mean in group F mean in group M
0.0233 0.0172
t.test(df$D1MFT~df$`1_gender`)
Welch Two Sample t-test
data: df$D1MFT by df$`1_gender`
t = -0.06, df = 2000, p-value = 1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.469 0.440
sample estimates:
mean in group F mean in group M
9.20 9.21
t.test(df$D3MFT~df$`1_gender`)
Welch Two Sample t-test
data: df$D3MFT by df$`1_gender`
t = 2, df = 2000, p-value = 0.1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0439 0.4650
sample estimates:
mean in group F mean in group M
3.46 3.25
t.test(df$D5MFT~df$`1_gender`)
Welch Two Sample t-test
data: df$D5MFT by df$`1_gender`
t = 1, df = 2000, p-value = 0.2
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.061 0.361
sample estimates:
mean in group F mean in group M
2.54 2.39
t.test(df$D1S~df$`1_gender`)
Welch Two Sample t-test
data: df$D1S by df$`1_gender`
t = -2, df = 2000, p-value = 0.1
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.610 0.161
sample estimates:
mean in group F mean in group M
12.3 13.0
t.test(df$D3S~df$`1_gender`)
Welch Two Sample t-test
data: df$D3S by df$`1_gender`
t = 0.4, df = 2000, p-value = 0.7
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.113 0.162
sample estimates:
mean in group F mean in group M
1.03 1.00
t.test(df$D5S~df$`1_gender`)
Welch Two Sample t-test
data: df$D5S by df$`1_gender`
t = -1, df = 2000, p-value = 0.3
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.2595 0.0761
sample estimates:
mean in group F mean in group M
0.572 0.664
t.test(df$FS~df$`1_gender`)
Welch Two Sample t-test
data: df$FS by df$`1_gender`
t = 0.5, df = 2000, p-value = 0.6
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.249 0.436
sample estimates:
mean in group F mean in group M
3.28 3.19
t.test(df$MS~df$`1_gender`)
Welch Two Sample t-test
data: df$MS by df$`1_gender`
t = 0.6, df = 2000, p-value = 0.5
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0480 0.0901
sample estimates:
mean in group F mean in group M
0.1096 0.0885
t.test(df$D1MFS~df$`1_gender`)
Welch Two Sample t-test
data: df$D1MFS by df$`1_gender`
t = -1, df = 2000, p-value = 0.2
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.789 0.435
sample estimates:
mean in group F mean in group M
17.2 17.9
t.test(df$D3MFS~df$`1_gender`)
Welch Two Sample t-test
data: df$D3MFS by df$`1_gender`
t = 0.2, df = 2000, p-value = 0.8
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.428 0.522
sample estimates:
mean in group F mean in group M
4.99 4.95
t.test(df$D5MFS~df$`1_gender`)
Welch Two Sample t-test
data: df$D5MFS by df$`1_gender`
t = 0.1, df = 2000, p-value = 0.9
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.395 0.441
sample estimates:
mean in group F mean in group M
3.96 3.94
t.test(df$Sealants~df$`1_gender`)
Welch Two Sample t-test
data: df$Sealants by df$`1_gender`
t = 0.4, df = 2000, p-value = 0.7
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.0473 0.0695
sample estimates:
mean in group F mean in group M
0.148 0.137
df.long <- df %>%
gather("byTooth", "valueByTooth", D1T:D5MFT) %>%
gather("bySurface", "valueBySurface", D1S:Sealants)
df.long$byTooth <- ordered(df.long$byTooth, levels = c(
"D1T", "D3T", "D5T", "FT",
"MT", "D1MFT", "D3MFT", "D5MFT"))
df.long$bySurface <- ordered(df.long$bySurface, levels = c(
"D1S", "D3S", "D5S", "FS", "MS",
"D1MFS", "D3MFS", "D5MFS", "Sealants"))
df.long %>%
ggplot(aes(factor(byTooth), valueByTooth)) +
geom_boxplot(aes( fill = `1_gender` ) ) +
theme_minimal() +
labs(title = " ", x = " ", y = "Zobi", color = "Dzimums\n")+
ggsave("./plots/dmftByGender.png", width=8, height=6, dpi=250)
df.long %>%
ggplot(aes(factor(bySurface), valueBySurface)) +
geom_boxplot(aes(fill = factor(`1_gender`))) +
theme_minimal() +
labs(title = " ", x = " ", y = "Zobi", color = "Dzimums\n") +
ggsave("./plots/dmfsByGender.png", width=8, height=6, dpi=250)