---
title: "Analysis_NVD_2022_pulp_treatment_IAPD"
author: "SU"
date: 2024-05-29
date-modified: last-modified
language:
title-block-published: "CREATED"
title-block-modified: "UPDATED"
format:
html:
toc: true
toc-expand: 4
code-fold: true
code-tools: true
editor: visual
execute:
echo: false
cache: true
warning: false
message: false
---
# Analysis_NVD_2022_pulp_treatment_IAPD
# Packages
```{r}
# Load required libraries with pacman; installs them if not already installed
pacman::p_load(tidyverse, # tools for data science
visdat, #NAs
janitor, # for data cleaning and tables
here, # for reproducible research
gtsummary, # for tables
scales,
lubridate
)
# FOR EDA try DataExplorer, SmartEDA o dllokr, check https://www.youtube.com/watch?v=sKrWYE63Vk4&t=7s
```
```{r}
# set theme
theme_set(theme_minimal())
```
# Dataset
```{r}
df <- readRDS(here("analysis", "data", "MAVITO_ZPN_MAN_UD1_B.rds"))
```
Filter only bernu zobarsts and zobarsts
```{r}
df <- df |>
filter(SPEC_KODS_FULL %in% c("Zobārsts", "Bērnu zobārsts"))
```
Create a new var year for the MAN_DAT
```{r}
df <- df |>
mutate(year = lubridate::year(MAN_DAT))
```
remove unused columns
```{r}
df <- df |>
select(-c(PAC_ATVK, PAC_ATVK_NOS, SPEC_KODS_FULL))
```
Create a unique ID for each visitm the variable PAC_MAN_COMB
```{r}
df <- df |>
mutate(PAC_MAN_COMB = str_c(PAC_ID, MAN_DAT, sep = "_"))
```
# Analysis
## How many unique patients?
```{r}
n_distinct(df$PAC_ID) |>
knitr::kable(caption = "Unique patients")
```
How many visits for the codes 70301, 70302, 70303, 70304, 70305?
```{r}
df |>
filter(MP_CODE %in% c(70301, 70302, 70303, 70304, 70305)) %>%
summarise(distinct_visits = n_distinct(PAC_MAN_COMB)) |>
knitr::kable(caption = "Unique visits for the codes 70301, 70302, 70303, 70304, 70305")
```
## How many pulpar treatments by age?
#### Codes 70301, 70302, 70303, 70304, 70305
##### For Zobārsts
```{r}
# group by PAC_MAN_COMB, keep all the rest and count
df |>
filter(MP_CODE %in% c(70301, 70302, 70303, 70304, 70305)) |> # Pulpar treatments
filter(SPEC_KODS == "Zobārsts") |> # Zobārsts
group_by(PAC_MAN_COMB, AGE, MP_CODE) |>
tabyl(AGE, MP_CODE) |>
adorn_totals(c("row", "col")) |>
knitr::kable(caption = "Pulpar treatments by Age - Zobarts")
```
##### For Bērnu zobārsts
```{r}
df |>
filter(MP_CODE %in% c(70301, 70302, 70303, 70304, 70305)) |> # Pulpar treatments
filter(SPEC_KODS == "Bērnu zobārsts") |> # Zobārsts
group_by(PAC_MAN_COMB, AGE, MP_CODE) |>
tabyl(AGE, MP_CODE) |>
adorn_totals(c("row", "col")) |>
knitr::kable(caption = "Pulpar treatments by Age - Bērnu zobārsts")
```
##### By Age
```{r}
# group by PAC_MAN_COMB, keep all the rest and count
df |>
filter(MP_CODE %in% c(70301, 70302, 70303, 70304, 70305)) |> # Pulpar treatments
group_by(PAC_MAN_COMB, AGE, MP_CODE) |>
tabyl(AGE, MP_CODE) |>
adorn_totals(c("row", "col")) |>
knitr::kable(caption = "Pulpar treatments by Age")
```
Why do they differ? Because a child can have multiple pulp treatments, and have birthday between the visits
```{r}
df |>
filter(MP_CODE %in% c(70301, 70302, 70303, 70304, 70305)) |> # Pulpar treatments
group_by(PAC_MAN_COMB, AGE, MP_CODE) |>
tabyl(AGE, MP_CODE) |>
pivot_longer(-AGE, names_to = "MP_CODE") |>
ggplot(aes(x = AGE, y = value, fill = MP_CODE)) +
geom_col() +
# scale_y_log10() +
scale_fill_viridis_d() +
theme_minimal() +
scale_y_continuous(labels = scales::comma) +
labs(title = "Pulpar treatments by Age", x = "Age", y = "Count")
```
```{r}
# now the same but by 100% stacked bar chart
df |>
filter(MP_CODE %in% c(70301, 70302, 70303, 70304, 70305)) |> # Pulpar treatments
group_by(PAC_MAN_COMB, AGE, MP_CODE) |>
tabyl(AGE, MP_CODE) |>
pivot_longer(-AGE, names_to = "MP_CODE") |>
ggplot(aes(x = AGE, y = value, fill = MP_CODE)) +
geom_col(position = "fill") +
# scale_y_log10() +
scale_fill_viridis_d() +
theme_minimal() +
scale_y_continuous(labels = scales::percent_format()) +
labs(title = "Pulpar treatments by Age (Stacked 100%)", x = "Age", y = "%")
```
##### By Specialist
```{r}
df |>
filter(AGE > 0 & AGE < 13) |>
filter(MP_CODE %in% c(70301, 70302, 70303, 70304, 70305, 70001)) |> # Pulpar treatments
group_by(PAC_MAN_COMB, SPEC_KODS, MP_CODE) |>
tabyl(SPEC_KODS, MP_CODE) |>
adorn_totals(c("row", "col")) |>
knitr::kable(caption = "Pulpar treatments by SPEC_KODS, children >0 & <13")
```
#### Codes 70400, 70401, 70402
##### By Age
```{r}
df |>
filter(MP_CODE %in% c(70400, 70401, 70402)) |> # other codes
group_by(PAC_MAN_COMB, AGE, MP_CODE) |>
tabyl(AGE, MP_CODE) |>
adorn_totals(c("row", "col")) |>
knitr::kable(caption = "Codes 70400, 70401, 70402")
```
```{r}
df |>
filter(MP_CODE %in% c(70400, 70401, 70402)) |>
group_by(PAC_MAN_COMB, AGE, MP_CODE) |>
tabyl(AGE, MP_CODE) |>
pivot_longer(-AGE, names_to = "MP_CODE") |>
ggplot(aes(x = AGE, y = value, fill = MP_CODE)) +
geom_col() +
# scale_y_log10() +
scale_fill_viridis_d() +
theme_minimal() +
scale_y_continuous(labels = scales::comma) +
labs(title = "70400, 70401, 70402 by Age", x = "Age", y = "Count")
```
```{r}
df |>
filter(MP_CODE %in% c(70400, 70401, 70402)) |>
group_by(PAC_MAN_COMB, AGE, MP_CODE) |>
tabyl(AGE, MP_CODE) |>
pivot_longer(-AGE, names_to = "MP_CODE") |>
ggplot(aes(x = AGE, y = value, fill = MP_CODE)) +
geom_col(position = "fill") +
# scale_y_log10() +
scale_fill_viridis_d() +
theme_minimal() +
scale_y_continuous(labels = scales::percent) +
labs(title = "70400, 70401, 70402 by Age (Stacked 100%)", x = "Age", y = "")
```
##### By Specialist
```{r}
df |>
filter(AGE > 0 & AGE < 13) |>
filter(MP_CODE %in% c(70400, 70401, 70402)) |> # other codes
group_by(PAC_MAN_COMB, SPEC_KODS, MP_CODE) |>
tabyl(SPEC_KODS, MP_CODE) |>
adorn_totals(c("row", "col")) |>
knitr::kable(caption = "Codes 70400, 70401, 70402 by SPEC_KODS, children >0 & <13")
```
## 0. Table 1
## 1. Table 2 How visits by 1, 2, 3 or \> pulp treatments?
```{r}
df |>
filter(MP_CODE %in% c(70301, 70302, 70303, 70304, 70305)) |>
group_by(PAC_MAN_COMB) |>
summarise(visit_count = n()) |>
group_by(visit_count) |>
count() |>
knitr::kable(caption = "MP_CODE 70301, 70302, 70303, 70304, 70305")
```
## 2. Frequency by visits MAN_KODS 70301...70305
```{r}
df |>
filter(MP_CODE %in% c(70400, 70401, 70402)) |>
group_by(PAC_MAN_COMB) |>
summarise(visit_count = n()) |>
group_by(visit_count) |>
count() |>
knitr::kable(caption = "MP_CODE 70400, 70401, 70402")
```
##