---
title: "02 Radiographic evaluation final"
author: "Sergio Uribe"
date-modified: last-modified
format:
html:
toc: true
toc-expand: 3
code-fold: true
code-tools: true
editor: visual
execute:
echo: false
cache: true
warning: false
message: false
---
# Packages
```{r}
pacman::p_load(tidyverse, # tools for data science
visdat, #NAs
janitor, # for data cleaning and tables
here, # for reproducible research
gtsummary, # for tables
countrycode, # to normalize country data
easystats, # check https://easystats.github.io/easystats/
scales,
irr, # for agreement
skimr,
lubridate,
# yardstick, # for the dataframe issue with kable
tidymodels # for the modelling
)
# FOR EDA try DataExplorer, SmartEDA o dllokr, check https://www.youtube.com/watch?v=sKrWYE63Vk4&t=7s
```
```{r}
#| output: false
theme_set(theme_minimal())
```
## Radiography evaluation
```{r}
#| output: false
radiographic <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vTCbgTOL0YgSx4EcW1chWabNEGqNMPIPwClKKIYpIJx5Z1XHi9jh_E9CtKAnlL5NfopYGb6TlZz0v_d/pub?gid=816313372&single=true&output=csv") |>
rename("Radiograph" = "Radiograph #")
```
## Key
```{r}
#| output: false
key <- read_csv("https://docs.google.com/spreadsheets/d/e/2PACX-1vTCbgTOL0YgSx4EcW1chWabNEGqNMPIPwClKKIYpIJx5Z1XHi9jh_E9CtKAnlL5NfopYGb6TlZz0v_d/pub?gid=1775223551&single=true&output=csv")
```
## Join
```{r}
radiographic <- radiographic |>
left_join(key, by = "Radiograph" ) |>
relocate("Group", .after = "Evaluator")
```
```{r}
# housekeeping
rm(key)
```
```{r}
#| output: false
glimpse(radiographic)
```
```{r}
radiographic <- radiographic |>
select(-`Comments (optional)`) |>
filter(Evaluator == "Neimane") |>
mutate(
`Root development` = fct_recode(`Root development`,
"Yes" = "+",
"No" = "-"),
`Root development` = fct_relevel(`Root development`,
"Yes", "No change", "No")
)
```
# EDA
```{r}
skimr::skim(radiographic)
```
```{r}
# radiographic |>
# group_by(Group) |>
# DataExplorer::create_report( output_dir = here("eda_reports"),
# output_file = "eda_radiographic.html",
# report_title = "Data Profiling Radiographic Report")
```
## Prepare the data
```{r}
radiographic <- radiographic |>
mutate(crown_change = `Crown FINAL (in pixels)` - `Crown BASELINE (in pixels)`,
root_change = `Root FINAL (in pixels)` - `Root BASELINE (in pixels)`)
```
Change any other periapical status to NA
```{r}
radiographic <- radiographic %>%
mutate(`Periapical status` = if_else(str_detect(`Periapical status`, "\\d"), `Periapical status`, NA_character_))
```
Change some levels
```{r}
radiographic <- radiographic |>
mutate(`External Root Resorption` = fct_relevel(`External Root Resorption`,
"No", #comparison group
"Yes"),
`Root development` = fct_relevel(`Root development`,
"Yes", #comparison group
"No change",
"No"),
Obliteration = fct_relevel(Obliteration,
"No", #comparison group
"Yes"))
```
# Radiological changes
## Crown
```{r}
radiographic |>
ggplot(aes(x = crown_change)) +
geom_histogram(bins = 8) +
facet_grid(Group ~.) +
labs(title = "Change in Crown")
```
```{r}
radiographic |>
select(Group, crown_change ) |>
gtsummary::tbl_summary(by = Group) |>
add_p()
```
## Root
```{r}
radiographic |>
ggplot(aes(x = root_change)) +
geom_histogram(bins = 8) +
facet_grid(Group ~.) +
labs(title = "Change in Root")
```
```{r}
radiographic |>
select(Group, root_change) |>
gtsummary::tbl_summary(by = Group) |>
add_p()
```
## Periapical Status
```{r}
radiographic |>
select(Group, `Periapical status`) |>
gtsummary::tbl_summary(by = Group) |>
add_p()
```
```{r}
radiographic |>
with(mosaicplot(table(Group, `Periapical status`), shade = T))
```
## Root development
```{r}
radiographic |>
select(Group, `Root development`) |>
gtsummary::tbl_summary(by = Group) |>
add_p()
```
```{r}
radiographic |>
with(mosaicplot(table(Group, `Root development`), shade = T))
```
## External root resorption
```{r}
radiographic |>
select(Group, `External Root Resorption`) |>
gtsummary::tbl_summary(by = Group) |>
add_p()
```
```{r}
radiographic |>
with(mosaicplot(table(Group, `External Root Resorption`), shade = T))
```
# Modelling
```{r}
# Prepare the data for modeling 1
radiographic <- radiographic %>%
mutate(Group = factor(Group))
# Prepare the data for modeling 2
radiographic <- radiographic %>%
mutate(Periapical_status = if_else(str_detect(`Periapical status`, "\\d"), `Periapical status`, NA_character_),
Obliteration = as.factor(Obliteration),
Root_development = as.factor(`Root development`),
External_Root_Resorption = as.factor(`External Root Resorption`)) %>%
select(Group, Obliteration, Periapical_status, Root_development, External_Root_Resorption, crown_change, root_change)
# Remove rows with NAs in Periapical_status
radiographic <- radiographic %>%
drop_na(Periapical_status)
# Split the data into training and testing sets
set.seed(123)
data_split <- initial_split(radiographic, prop = 0.8, strata = Group)
train_data <- training(data_split)
test_data <- testing(data_split)
# Define the recipe
rec <- recipe(Group ~ Obliteration + Periapical_status + Root_development + External_Root_Resorption + crown_change + root_change, data = train_data) %>%
step_dummy(all_nominal_predictors(), -all_outcomes()) %>%
step_normalize(all_numeric_predictors())
# Define the model
log_reg_model <- logistic_reg() %>%
set_engine("glm")
# Create the workflow
wf <- workflow() %>%
add_recipe(rec) %>%
add_model(log_reg_model)
# Train the model
log_reg_fit <- wf %>%
fit(data = train_data)
# Make predictions on the test set
test_predictions <- log_reg_fit %>%
predict(test_data) %>%
bind_cols(test_data)
# Evaluate the model
conf_mat <- conf_mat(test_predictions, truth = Group, estimate = .pred_class)
accuracy <- accuracy(test_predictions, truth = Group, estimate = .pred_class)
```
### Confusion matrix
```{r}
# Print the results
print(conf_mat) %>%
# as.data.frame() %>%
knitr::kable()
```
### Accuracy
```{r}
accuracy |>
knitr::kable()
```
```{r}
glm_model <- glm(Group ~ Obliteration + Periapical_status + Root_development + External_Root_Resorption,
data = train_data,
family = binomial)
```
## Summary model table
```{r}
glm_model |> gtsummary::tbl_regression()
```
So, overall, no differences in radiological features between the groups