---
title: "02 EDA 2024 Tesis Rodriguez ODPUV"
author: "SU"
date: 2024-08-12
date-modified: last-modified
language:
title-block-published: "CREATED"
title-block-modified: "UPDATED"
format:
html:
toc: true
toc-expand: 3
code-fold: true
code-tools: true
editor: visual
execute:
echo: false
cache: false
warning: false
message: false
---
# Packages
```{r}
pacman::p_load(tidyverse, # tools for data science
visdat, #NAs
viridis,
janitor, # for data cleaning and tables
here, # for reproducible research
gtsummary, # for tables
easystats, # check https://easystats.github.io/easystats/
scales,
lubridate
)
```
## Palettes
```{r}
# palette
# Define the custom color palette
custom_palette <- c(
"#22a884", # Green
"#fde725", # Yellow
"#FFBF00",
# muted("#fde725", l = 30), # Darker version of Yellow
"#31688e", # Blue
"#443983", # Purple
"#8c2981", # Darker Purple
"#de4968" # Deep Dark Purple
)
```
Paleta para ICCMS
```{r}
color_palette <- c(
"A Sano" = "#22a884", # Green for sano
"B Inicial" = "#fde725", # Yellow for inicial
"C Moderado" = "#31688e", # Purple for moderado
"D Severo" = "#de4968" , # Deep Dark Purple for severo
"B Detenida" = "#31688e",
"C Activa" = "#8c2981"
)
```
Paleta para Nyvad
```{r}
nyvad_palette <- c(
"0" = "#addc30", # NYVAD 0
"1" = "#f9cb35", # NYVAD 1
"2" = "#f98e09", # NYVAD 2
"3" = "#e45a31", # NYVAD 3
"4" = "#ece7f2", # NYVAD 4
"5" = "#a6bddb", # NYVAD 5
"6" = "#2b8cbe", # NYVAD 6
"7" = "#bc3754", # NYVAD 7
"8" = "#8a226a", # NYVAD 8
"9" = "#57106e" # NYVAD 9
)
```
```{r}
theme_set(theme_minimal())
```
# Dataset
```{r}
df <- read_csv(here("01_clean_data", "df.csv"))
```
Check completeness
```{r}
# df |>
# visdat::vis_dat()
```
```{r}
df |>
glimpse()
```
# Preparing for analysis
Ordering surfaces
```{r}
df <- df %>%
mutate(Surface = fct_relevel(Surface, "M", "D", "O", "V", "P/L"))
```
Ordering dte
```{r}
dte_order <- c(
# Primarios
"5.5", "5.4", "5.3", "5.2", "5.1",
"6.1", "6.2", "6.3", "6.4", "6.5",
"7.5", "7.4", "7.3", "7.2", "7.1",
"8.1", "8.2", "8.3", "8.4", "8.5",
# Permanentes
"1.8", "1.7", "1.6", "1.5", "1.4", "1.3", "1.2", "1.1",
"2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8",
"3.8", "3.7", "3.6", "3.5", "3.4", "3.3", "3.2", "3.1",
"4.1", "4.2", "4.3", "4.4", "4.5", "4.6", "4.7", "4.8"
)
# Reorder Dte using fct_relevel
df <- df %>%
mutate(Dte = fct_relevel(as.character(Dte), !!!dte_order))
```
```{r}
# rm(dte_order)
```
Add a new column for Primario/Permanente
```{r}
df <- df %>%
mutate(
Denticion = case_when(
str_starts(as.character(Dte), "5") ~ "Primario",
str_starts(as.character(Dte), "6") ~ "Primario",
str_starts(as.character(Dte), "7") ~ "Primario",
str_starts(as.character(Dte), "8") ~ "Primario",
TRUE ~ "Permanente" # For any other values, it's Permanente
)
)
```
```{r}
df <- df %>%
mutate(Denticion = fct_relevel(Denticion, "Primario", "Permanente"))
```
## Highest ICDAS per surface per Dte per ID
```{r}
df <- df %>%
group_by(ID, Dte) %>%
mutate(`Highest ICDAS` = max(ICDAS, na.rm = TRUE)) %>%
ungroup()
```
## CEO/ceo
```{r}
df <- df %>%
group_by(ID, Dte) %>%
mutate(Highest_ICDAS = ifelse(all(is.na(ICDAS)), NA, max(ICDAS, na.rm = TRUE))) %>% # Handle groups with all NA values
ungroup() %>%
mutate(CEO_D = case_when(
Restauracion == "Rest - Rest" ~ "P", # Rule 1: Fixing the equality check (== instead of =)
NYVAD == 7 ~ "O", # Rule 2
Highest_ICDAS > 2 ~ "C", # Rule 3
TRUE ~ "" # Rule 4: Default ""
))
```
Now by Dte
```{r}
df <- df %>%
group_by(ID, Dte) %>%
mutate(CEO_D = case_when(
any(CEO_D == "P") ~ "P", # If any surface has "P", assign "P" to the entire Dte
any(CEO_D == "C") ~ "C", # If no "P", but any surface has "C", assign "C"
any(CEO_D == "O") ~ "O", # If no "P" or "C", but any surface has "O", assign "O"
TRUE ~ "" # Otherwise, leave as blank
)) %>%
ungroup()
```
```{r}
df <- df %>%
group_by(ID, Dte) %>%
mutate(
P = case_when(any(CEO_D == "P") ~ 1, TRUE ~ 0), # If any surface has "P", assign 1, else 0
C = case_when(any(CEO_D == "C") ~ 1, TRUE ~ 0), # If any surface has "C", assign 1, else 0
O = case_when(any(CEO_D == "O") ~ 1, TRUE ~ 0) # If any surface has "O", assign 1, else 0
) %>%
ungroup() |>
select(-c(CEO_D))
```
```{r}
df <- df |>
relocate(C, .before = P)
```
## Teeth per child
```{r}
df_dte_count_per_id <- df %>%
group_by(ID) %>%
summarise(total_dte = n_distinct(Dte)) %>% # Count distinct Dte per ID
ungroup()
```
```{r}
# Now, add this info to the original df as a new column `total_dte`
df <- df %>%
left_join(df_dte_count_per_id, by = "ID")
```
```{r}
rm(df_dte_count_per_id)
```
# How many children?
```{r}
df %>%
summarise(num_children = n_distinct(ID)) %>%
knitr::kable()
```
## Sexo
```{r}
df %>%
group_by(Sexo) %>%
summarise(num_children = n_distinct(ID)) %>%
knitr::kable()
```
## Edad
```{r}
# Count the number of unique children per age and print the result
df %>%
group_by(Edad) %>%
summarise(num_children = n_distinct(ID)) %>%
knitr::kable()
```
```{r}
# Create a histogram to show the distribution of children by age
df %>%
distinct(ID, Edad) %>% # Ensure we are counting each child only once per age
ggplot(aes(x = Edad)) +
geom_histogram(binwidth = 1) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) + # Ensure only whole numbers on the y-axis
labs(
title = "Distribution of Unique Children by Age",
x = "Age",
y = "Number of Children"
) +
theme_minimal()
```
## Curso
```{r}
# Create a table of unique children by 'Curso'
df %>%
group_by(Curso) %>%
summarise(num_children = n_distinct(ID)) %>%
arrange(Curso) %>%
knitr::kable()
```
# ICDAS
## Highest ICDAS per tooth
```{r}
df %>%
filter(Denticion == "Primario", !is.na(ICDAS)) %>%
# mutate(Dte = fct_relevel(Dte, !!!fdi_order)) %>% # Ensure Dte follows the custom order
ggplot(aes(x = Dte, y = ICDAS, color = Surface)) +
geom_jitter(width = 0.2, height = 0.1, alpha = 0.3, size = 1.2) +
labs(
title = "ICDAS Values per Tooth and Surface (Primario)",
x = "Tooth (Dte)",
y = "ICDAS Value",
color = "Surface"
) +
theme_minimal() +
scale_color_viridis_d(option = "D") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
```{r}
df %>%
filter(Denticion == "Permanente", !is.na(ICDAS)) %>%
# mutate(Dte = fct_relevel(Dte, !!!fdi_order)) %>% # Ensure Dte follows the custom order
ggplot(aes(x = Dte, y = ICDAS, color = Surface)) +
geom_jitter(width = 0.2, height = 0.1, alpha = 0.3, size = 1.2) +
labs(
title = "ICDAS Values per Tooth and Surface (Permanente)",
x = "Tooth (Dte)",
y = "ICDAS Value",
color = "Surface"
) +
theme_minimal() +
scale_color_viridis_d(option = "D") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
## Primarios
```{r}
df %>%
filter(!is.na(ICDAS)) |>
filter(Denticion == "Primario") |>
group_by(Dte, ICDAS) %>%
summarise(count = n()) %>%
ungroup() |>
ggplot(aes(x = Dte, y = count, fill = factor(ICDAS))) +
geom_bar(stat = "identity") +
labs(
title = "Stacked Bar Plot of ICDAS Counts per Dte, PRIMARIOS",
x = "Dte",
y = "Count",
fill = "ICDAS"
) +
scale_fill_manual(values = custom_palette) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
```{r}
df %>%
filter(!is.na(ICDAS)) |>
filter(Denticion == "Primario") |>
group_by(Dte, ICDAS) %>%
summarise(count = n()) %>%
ungroup() |>
ggplot(aes(x = Dte, y = count / sum(count), fill = factor(ICDAS), group = Dte)) + # Change this line
geom_bar(stat = "identity", position = "fill") + # Ensure position = "fill" for 100% stacking
labs(
title = "100% Stacked Bar Plot of ICDAS Counts per Dte, Primario",
x = "Dte",
y = "Proportion",
fill = "ICDAS"
) +
scale_fill_manual(values = custom_palette) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
## Permanente
```{r}
df %>%
filter(!is.na(ICDAS)) |>
filter(Denticion == "Permanente") |>
group_by(Dte, ICDAS) %>%
summarise(count = n()) %>%
ungroup() |>
ggplot(aes(x = Dte, y = count, fill = factor(ICDAS))) +
geom_bar(stat = "identity") +
labs(
title = "Stacked Bar Plot of ICDAS Counts per Dte, Permanente",
x = "Dte",
y = "Count",
fill = "ICDAS"
) +
scale_fill_manual(values = custom_palette) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
```{r}
df %>%
filter(!is.na(ICDAS)) |>
filter(Denticion == "Permanente") |>
group_by(Dte, ICDAS) %>%
summarise(count = n()) %>%
ungroup() |>
ggplot(aes(x = Dte, y = count / sum(count), fill = factor(ICDAS), group = Dte)) + # Change this line
geom_bar(stat = "identity", position = "fill") + # Ensure position = "fill" for 100% stacking
labs(
title = "100% Stacked Bar Plot of ICDAS Counts per Dte, Permanente",
x = "Dte",
y = "Proportion",
fill = "ICDAS"
) +
scale_fill_manual(values = custom_palette) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
## ICDAS per surface
```{r}
df %>%
filter(Denticion == "Primario") |>
filter(!is.na(ICDAS)) %>%
group_by(Dte, Surface) %>%
summarise(mean_ICDAS = mean(ICDAS, na.rm = TRUE), .groups = 'drop') %>%
ggplot(aes(y = Surface, x = Dte, fill = mean_ICDAS)) +
geom_tile() +
labs(
title = "Mean ICDAS Score per Surface per Primary Tooth",
x = "Surface",
y = "Tooth (Dte)",
fill = "Mean ICDAS"
) +
scale_fill_gradient(low = "white", high = "darkred") + # Gradient from white to dark red
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
```{r}
df %>%
filter(Denticion == "Permanente") |>
filter(!is.na(ICDAS)) %>%
group_by(Dte, Surface) %>%
summarise(mean_ICDAS = mean(ICDAS, na.rm = TRUE), .groups = 'drop') %>%
ggplot(aes(y = Surface, x = Dte, fill = mean_ICDAS)) +
geom_tile() +
labs(
title = "Mean ICDAS Score per Surface per Permanent Tooth",
x = "Surface",
y = "Tooth (Dte)",
fill = "Mean ICDAS"
) +
scale_fill_gradient(low = "white", high = "darkred") + # Gradient from white to dark red
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
# PUFA por diente
```{r}
df %>%
filter(!is.na(PUFA)) %>%
group_by(Dte, PUFA) %>%
summarise(count = n(), .groups = 'drop') %>%
pivot_wider(names_from = PUFA, values_from = count, values_fill = 0) %>% # Reshape to wide format
arrange(Dte) %>%
knitr::kable()
```
```{r}
df %>%
filter(!is.na(PUFA)) %>%
group_by(Dte, PUFA) %>%
summarise(count = n(), .groups = 'drop') %>%
ggplot(aes(x = fct_reorder(Dte, count),
y = count, fill = factor(PUFA))) +
geom_bar(stat = "identity") +
labs(
title = "Distribution of PUFA by Dte",
x = "Tooth (Dte)",
y = "Count",
fill = "PUFA"
) +
scale_fill_manual(values = custom_palette) + # Use your custom palette or adjust as needed
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
# CPO-D
```{r}
# df %>%
# group_by(ID, Dte) %>%
# summarise(
# C_status = case_when(any(C == 1) ~ 1, TRUE ~ 0), # Mark if Dte has C
# P_status = case_when(any(P == 1) ~ 1, TRUE ~ 0), # Mark if Dte has P
# O_status = case_when(any(O == 1) ~ 1, TRUE ~ 0), # Mark if Dte has O
# .groups = 'drop'
# ) %>%
# group_by(ID) %>%
# summarise(
# Total_C = sum(C_status, na.rm = TRUE), # Count how many Dte have C
# Total_P = sum(P_status, na.rm = TRUE), # Count how many Dte have P
# Total_O = sum(O_status, na.rm = TRUE) # Count how many Dte have O
# ) %>%
# # Calculate the total sum
# mutate(Total_Sum = Total_C + Total_P + Total_O) %>%
# ungroup() |>
# knitr::kable()
```
```{r}
df %>%
filter(Denticion == "Primario") |>
group_by(ID, Dte) %>%
summarise(
C_status = case_when(any(C == 1) ~ 1, TRUE ~ 0), # Mark if Dte has C
P_status = case_when(any(P == 1) ~ 1, TRUE ~ 0), # Mark if Dte has P
O_status = case_when(any(O == 1) ~ 1, TRUE ~ 0), # Mark if Dte has O
.groups = 'drop'
) %>%
group_by(ID) %>%
summarise(
Total_C = sum(C_status, na.rm = TRUE), # Count how many Dte have C
Total_P = sum(P_status, na.rm = TRUE), # Count how many Dte have P
Total_O = sum(O_status, na.rm = TRUE) # Count how many Dte have O
) %>%
# Calculate the total sum
mutate(Total_Sum = Total_C + Total_P + Total_O) %>%
ungroup() |>
select(Total_Sum) |>
ggplot(aes(x = Total_Sum)) +
geom_histogram(binwidth = 1) +
labs(
title = "Distribution of CPO(D) (Primario) Scores per Child",
x = "CPO(D)",
y = "Frequency"
)
```
```{r}
df %>%
filter(Denticion == "Permanente") |>
group_by(ID, Dte) %>%
summarise(
C_status = case_when(any(C == 1) ~ 1, TRUE ~ 0), # Mark if Dte has C
P_status = case_when(any(P == 1) ~ 1, TRUE ~ 0), # Mark if Dte has P
O_status = case_when(any(O == 1) ~ 1, TRUE ~ 0), # Mark if Dte has O
.groups = 'drop'
) %>%
group_by(ID) %>%
summarise(
Total_C = sum(C_status, na.rm = TRUE), # Count how many Dte have C
Total_P = sum(P_status, na.rm = TRUE), # Count how many Dte have P
Total_O = sum(O_status, na.rm = TRUE) # Count how many Dte have O
) %>%
# Calculate the total sum
mutate(Total_Sum = Total_C + Total_P + Total_O) %>%
ungroup() |>
select(Total_Sum) |>
ggplot(aes(x = Total_Sum)) +
geom_histogram(binwidth = 1) +
labs(
title = "Distribution of CPO(D) (Permaente) Scores per Child",
x = "CPO(D)",
y = "Frequency"
)
```
```{r}
# df %>%
# group_by(ID, Dte) %>%
# summarise(
# C_status = case_when(any(C == 1) ~ 1, TRUE ~ 0), # Mark if Dte has C
# P_status = case_when(any(P == 1) ~ 1, TRUE ~ 0), # Mark if Dte has P
# O_status = case_when(any(O == 1) ~ 1, TRUE ~ 0), # Mark if Dte has O
# CPO = C_status + P_status + O_status, # Calculate the CPO for each Dte
# .groups = 'drop'
# ) %>%
# group_by(ID) %>%
# summarise(
# Mean_CPO = mean(CPO, na.rm = TRUE) # Calculate the mean CPO per Dte for each ID
# ) %>%
# ungroup() %>%
# knitr::kable()
```
```{r}
# Function to calculate 95% confidence interval
ci_95 <- function(x) {
mean(x, na.rm = TRUE) + qt(c(0.025, 0.975), df = length(x)-1) * sd(x, na.rm = TRUE) / sqrt(length(x))
}
# Calculate mean, SD, and 95% CI per Dte and plot
df %>%
filter(Denticion == "Primario") |>
group_by(ID, Dte) %>%
summarise(
C_status = case_when(any(C == 1) ~ 1, TRUE ~ 0), # Mark if Dte has C
P_status = case_when(any(P == 1) ~ 1, TRUE ~ 0), # Mark if Dte has P
O_status = case_when(any(O == 1) ~ 1, TRUE ~ 0), # Mark if Dte has O
CPO = C_status + P_status + O_status, # Calculate the CPO for each Dte
.groups = 'drop'
) |>
ungroup() |>
# Summarize per Dte, including mean, SD, and CI
group_by(Dte) %>%
summarise(
Mean_CPO = mean(CPO, na.rm = TRUE), # Calculate mean CPO per Dte
SD_CPO = sd(CPO, na.rm = TRUE), # Calculate SD of CPO per Dte
CI_low = ci_95(CPO)[1], # 95% CI lower bound
CI_high = ci_95(CPO)[2], # 95% CI upper bound
.groups = 'drop'
) |>
# The plot
ggplot(aes(x = Dte, y = Mean_CPO)) +
geom_point(size = 4, alpha = 0.8) + # Add points with size and transparency
geom_errorbar(aes(ymin = CI_low, ymax = CI_high), width = 0.2, color = "grey") + # Add CI error bars
scale_color_viridis_c(option = "D") + # Use a continuous color scale (CPO color gradient)
labs(
title = "Mean CPO Score per Tooth (Dte Primario) with 95% CI",
x = "Tooth (Dte)",
y = "Mean CPO Score",
color = "Mean CPO"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
```{r}
# Calculate mean, SD, and 95% CI per Dte and plot
df %>%
filter(Denticion == "Permanente") |>
group_by(ID, Dte) %>%
summarise(
C_status = case_when(any(C == 1) ~ 1, TRUE ~ 0), # Mark if Dte has C
P_status = case_when(any(P == 1) ~ 1, TRUE ~ 0), # Mark if Dte has P
O_status = case_when(any(O == 1) ~ 1, TRUE ~ 0), # Mark if Dte has O
CPO = C_status + P_status + O_status, # Calculate the CPO for each Dte
.groups = 'drop'
) |>
ungroup() |>
# Summarize per Dte, including mean, SD, and CI
group_by(Dte) %>%
summarise(
Mean_CPO = mean(CPO, na.rm = TRUE), # Calculate mean CPO per Dte
SD_CPO = sd(CPO, na.rm = TRUE), # Calculate SD of CPO per Dte
CI_low = ci_95(CPO)[1], # 95% CI lower bound
CI_high = ci_95(CPO)[2], # 95% CI upper bound
.groups = 'drop'
) |>
# The plot
ggplot(aes(x = Dte, y = Mean_CPO)) +
geom_point(size = 4, alpha = 0.8) + # Add points with size and transparency
geom_errorbar(aes(ymin = CI_low, ymax = CI_high), width = 0.2, color = "grey") + # Add CI error bars
scale_color_viridis_c(option = "D") + # Use a continuous color scale (CPO color gradient)
labs(
title = "Mean CPO Score per Tooth (Dte Permanente) with 95% CI",
x = "Tooth (Dte)",
y = "Mean CPO Score",
color = "Mean CPO"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
```
# ICCMS Severidad
```{r}
df %>%
filter(Denticion == "Primario") %>%
filter(!is.na(ICCMS_severidad)) %>%
group_by(Dte, ICCMS_severidad) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(Dte = fct_relevel(Dte, !!!dte_order)) %>% # Ensure Dte follows the custom order
pivot_wider(names_from = ICCMS_severidad, values_from = count, values_fill = 0) %>%
arrange(Dte) |> # Arrange by the custom Dte order
pivot_longer(-Dte, names_to = "ICCMS_severidad", values_to = "count") |> # Reshape data for plotting
ggplot(aes(x = Dte, y = count, fill = ICCMS_severidad)) +
geom_bar(stat = "identity", position = "fill") + # Use position = "fill" for 100% stacking
scale_fill_manual(values = color_palette) + # Apply custom color palette
labs(
title = "ICCMS Severidad Distribution per Tooth (Dte Primario)",
x = "Tooth (Dte)",
y = "Proportion", # Adjust label for proportions
fill = "ICCMS Severidad"
) +
scale_y_continuous(labels = scales::percent_format()) + # Show y-axis as percentage
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
```{r}
df %>%
filter(Denticion == "Permanente") %>%
filter(!is.na(ICCMS_severidad)) %>%
group_by(Dte, ICCMS_severidad) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(Dte = fct_relevel(Dte, !!!dte_order)) %>% # Ensure Dte follows the custom order
pivot_wider(names_from = ICCMS_severidad, values_from = count, values_fill = 0) %>%
arrange(Dte) |> # Arrange by the custom Dte order
pivot_longer(-Dte, names_to = "ICCMS_severidad", values_to = "count") |> # Reshape data for plotting
ggplot(aes(x = Dte, y = count, fill = ICCMS_severidad)) +
geom_bar(stat = "identity", position = "fill") + # Use position = "fill" for 100% stacking
scale_fill_manual(values = color_palette) + # Apply custom color palette
labs(
title = "ICCMS Severidad Distribution per Tooth (Dte Permanente)",
x = "Tooth (Dte)",
y = "Proportion", # Adjust label for proportions
fill = "ICCMS Severidad"
) +
scale_y_continuous(labels = scales::percent_format()) + # Show y-axis as percentage
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
```{r}
df %>%
filter(Denticion == "Primario") %>%
filter(!is.na(ICCMS_severidad)) %>%
group_by(Dte, ICCMS_severidad) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(Dte = fct_relevel(Dte, !!!dte_order)) %>% # Ensure Dte follows the custom order
pivot_wider(names_from = ICCMS_severidad, values_from = count, values_fill = 0) %>%
arrange(Dte) %>% # Arrange by the custom Dte order
knitr::kable()
```
```{r}
df %>%
filter(Denticion == "Permanente") %>%
filter(!is.na(ICCMS_severidad)) %>%
group_by(Dte, ICCMS_severidad) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(Dte = fct_relevel(Dte, !!!dte_order)) %>% # Ensure Dte follows the custom order
pivot_wider(names_from = ICCMS_severidad, values_from = count, values_fill = 0) %>%
arrange(Dte) %>% # Arrange by the custom Dte order
knitr::kable()
```
# ICCMS actividad
```{r}
df %>%
filter(Denticion == "Primario") %>%
filter(!is.na(ICCMS_actividad)) %>%
group_by(Dte, ICCMS_actividad) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(Dte = fct_relevel(Dte, !!!dte_order)) %>% # Ensure Dte follows the custom order
pivot_wider(names_from = ICCMS_actividad, values_from = count, values_fill = 0) %>%
arrange(Dte) %>%
pivot_longer(-Dte, names_to = "ICCMS_actividad", values_to = "count") %>%
ggplot(aes(x = Dte, y = count, fill = ICCMS_actividad)) +
geom_bar(stat = "identity", position = "fill") + # Use position = "fill" for 100% stacking
scale_fill_manual(values = color_palette) + # Apply custom color palette
labs(
title = "ICCMS Actividad Distribution per Tooth (Dte Primario)",
x = "Tooth (Dte)",
y = "Proportion", # Adjust label for proportions
fill = "ICCMS Actividad"
) +
scale_y_continuous(labels = scales::percent_format()) + # Show y-axis as percentage
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
```{r}
df %>%
filter(Denticion == "Permanente") %>%
filter(!is.na(ICCMS_actividad)) %>%
group_by(Dte, ICCMS_actividad) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(Dte = fct_relevel(Dte, !!!dte_order)) %>% # Ensure Dte follows the custom order
pivot_wider(names_from = ICCMS_actividad, values_from = count, values_fill = 0) %>%
arrange(Dte) %>%
pivot_longer(-Dte, names_to = "ICCMS_actividad", values_to = "count") %>%
ggplot(aes(x = Dte, y = count, fill = ICCMS_actividad)) +
geom_bar(stat = "identity", position = "fill") + # Use position = "fill" for 100% stacking
scale_fill_manual(values = color_palette) + # Apply custom color palette
labs(
title = "ICCMS Actividad Distribution per Tooth (Dte Permanente)",
x = "Tooth (Dte)",
y = "Proportion", # Adjust label for proportions
fill = "ICCMS Actividad"
) +
scale_y_continuous(labels = scales::percent_format()) + # Show y-axis as percentage
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
```{r}
df %>%
filter(Denticion == "Primario") %>%
filter(!is.na(ICCMS_actividad)) %>%
group_by(Dte, ICCMS_actividad) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(Dte = fct_relevel(Dte, !!!dte_order)) %>% # Ensure Dte follows the custom order
pivot_wider(names_from = ICCMS_actividad, values_from = count, values_fill = 0) %>%
arrange(Dte) %>% # Arrange by the custom Dte order
knitr::kable()
```
```{r}
df %>%
filter(Denticion == "Permanente") %>%
filter(!is.na(ICCMS_actividad)) %>%
group_by(Dte, ICCMS_actividad) %>%
summarise(count = n(), .groups = 'drop') %>%
mutate(Dte = fct_relevel(Dte, !!!dte_order)) %>% # Ensure Dte follows the custom order
pivot_wider(names_from = ICCMS_actividad, values_from = count, values_fill = 0) %>%
arrange(Dte) %>% # Arrange by the custom Dte order
knitr::kable()
```
# Nyvad
```{r}
# df %>%
# filter(Denticion == 'Primario') |>
# filter(!is.na(NYVAD)) %>%
# group_by(Dte) %>%
# summarise(
# Mean_NYVAD = mean(NYVAD, na.rm = TRUE), # Calculate the mean NYVAD score per Dte
# SD_NYVAD = sd(NYVAD, na.rm = TRUE), # Calculate the standard deviation
# Count = n() # Count the number of observations per Dte
# ) %>%
# ggplot(aes(x = Dte, y = Mean_NYVAD)) +
# geom_point(size = 3, color = "Grey 70") + # Plot the mean NYVAD as points
# geom_errorbar(aes(ymin = Mean_NYVAD - SD_NYVAD, ymax = Mean_NYVAD + SD_NYVAD), width = 0.2, color = "Grey 30") + # Add error bars for standard deviation
# labs(
# title = "Summary of NYVAD Scores per Dentición Primaria",
# x = "Tooth (Dte)",
# y = "Mean NYVAD Score"
# ) +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
```{r}
# df %>%
# filter(Denticion == 'Permanente') |>
# filter(!is.na(NYVAD)) %>%
# group_by(Dte) %>%
# summarise(
# Mean_NYVAD = mean(NYVAD, na.rm = TRUE), # Calculate the mean NYVAD score per Dte
# SD_NYVAD = sd(NYVAD, na.rm = TRUE), # Calculate the standard deviation
# Count = n() # Count the number of observations per Dte
# ) %>%
# ggplot(aes(x = Dte, y = Mean_NYVAD)) +
# geom_point(size = 3, color = "Grey 70") + # Plot the mean NYVAD as points
# geom_errorbar(aes(ymin = Mean_NYVAD - SD_NYVAD, ymax = Mean_NYVAD + SD_NYVAD), width = 0.2, color = "Grey 30") + # Add error bars for standard deviation
# labs(
# title = "Summary of NYVAD Scores per Dentición Permanente",
# x = "Tooth (Dte)",
# y = "Mean NYVAD Score"
# ) +
# theme_minimal() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
```{r}
df %>%
filter(Denticion == 'Primario') |>
filter(!is.na(NYVAD)) %>%
mutate(NYVAD = factor(NYVAD)) %>% # Convert NYVAD to factor
group_by(Dte, NYVAD) %>%
summarise(count = n(), .groups = 'drop') %>%
ggplot(aes(x = Dte, y = count, fill = NYVAD)) +
geom_bar(stat = "identity", position = "fill") + # 100% stacked bar plot
scale_fill_manual(values = nyvad_palette) + # Apply custom palette
scale_y_continuous(labels = scales::percent_format()) + # Show y-axis as percentages
labs(
title = "NYVAD Score Distribution per Tooth Primario (100% Stacked Bar)",
x = "Tooth (Dte)",
y = "Proportion",
fill = "NYVAD Score"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
```{r}
df %>%
filter(Denticion == 'Permanente') |>
filter(!is.na(NYVAD)) %>%
mutate(NYVAD = factor(NYVAD)) %>% # Convert NYVAD to factor
group_by(Dte, NYVAD) %>%
summarise(count = n(), .groups = 'drop') %>%
ggplot(aes(x = Dte, y = count, fill = NYVAD)) +
geom_bar(stat = "identity", position = "fill") + # 100% stacked bar plot
scale_fill_manual(values = nyvad_palette) + # Apply custom palette
scale_y_continuous(labels = scales::percent_format()) + # Show y-axis as percentages
labs(
title = "NYVAD Score Distribution per Tooth Permanente (100% Stacked Bar)",
x = "Tooth (Dte)",
y = "Proportion",
fill = "NYVAD Score"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
## Highest Nyvad per tooth
```{r}
df %>%
filter(Denticion == "Primario") %>%
filter(!is.na(NYVAD)) %>%
group_by(ID, Dte) %>%
summarise(Highest_NYVAD = max(NYVAD, na.rm = TRUE), .groups = 'drop') %>%
ggplot(aes(x = Dte,
y = as.numeric(Highest_NYVAD),
color = as.factor(Highest_NYVAD) # Treat NYVAD as a factor for color mapping
)) +
geom_jitter(size = 3, alpha = 0.3) + # Scatter plot with transparency
scale_color_manual(values = nyvad_palette) + # Apply the custom NYVAD color palette
labs(
title = "Highest NYVAD Score per Tooth (Primario) per Child",
x = "Tooth (Dte)",
y = "Highest NYVAD Score",
color = "NYVAD Score"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
```{r}
df %>%
filter(Denticion == "Permanente") %>%
filter(!is.na(NYVAD)) %>%
group_by(ID, Dte) %>%
summarise(Highest_NYVAD = max(NYVAD, na.rm = TRUE), .groups = 'drop') %>%
ggplot(aes(x = Dte,
y = as.numeric(Highest_NYVAD),
color = as.factor(Highest_NYVAD) # Treat NYVAD as a factor for color mapping
)) +
geom_jitter(size = 3, alpha = 0.3) + # Scatter plot with transparency
scale_color_manual(values = nyvad_palette) + # Apply the custom NYVAD color palette
labs(
title = "Highest NYVAD Score per Tooth (Permanente) per Child",
x = "Tooth (Dte)",
y = "Highest NYVAD Score",
color = "NYVAD Score"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
```
## Highest Nyvad per child
```{r}
df %>%
filter(!is.na(NYVAD)) %>%
group_by(ID) %>%
summarise(Highest_NYVAD = max(NYVAD, na.rm = TRUE), .groups = 'drop') |>
knitr::kable()
```
```{r}
df %>%
filter(!is.na(NYVAD)) %>%
group_by(ID) %>%
summarise(Highest_NYVAD = max(NYVAD, na.rm = TRUE), .groups = 'drop') %>%
# Convert Highest_NYVAD to a factor to ensure each category is shown on x-axis
ggplot(aes(x = factor(Highest_NYVAD, levels = 0:9))) +
geom_bar() + # Bar plot with color
labs(
title = "Histogram of Highest NYVAD Score per Child",
x = "Highest NYVAD Score",
y = "Frequency"
) +
theme_minimal()
```