library(tibble)
library(knitr)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(modelr)
library(tidyr)
library(registryr)
library(scales)registryr provides tools for the analysis of the clinical registry datasets. These tools will be described in this vignette.
Let’s firstly load a fake dataset which comes with the package.
data(fake_data)
head(fake_data)
#> # A tibble: 6 × 9
#> patient_id site_id sex sex_cat age death_prob mean_los los dead
#> <int> <int> <int> <chr> <int> <dbl> <dbl> <dbl> <int>
#> 1 1 1 1 Male 56 0.354 45.6 43.8 1
#> 2 2 1 1 Male 49 0.326 41.4 44.0 1
#> 3 3 1 1 Male 39 0.286 35.4 30.6 0
#> 4 4 1 1 Male 49 0.326 41.4 37.3 1
#> 5 5 1 1 Male 54 0.346 44.4 40.9 0
#> 6 6 1 0 Female 54 0.316 42.4 42.2 1We will firstly create a population dataset and then use the population_pyramid function to make a population pyramid plot
population_df <-
fake_data %>%
select(-sex) %>%
rename(sex = sex_cat) %>%
mutate(age_group = cut(age, c(-Inf, seq(20, 70, 10), Inf))) %>%
count(sex, age_group)
population_df
#> # A tibble: 12 × 3
#> sex age_group n
#> <chr> <fct> <int>
#> 1 Female (20,30] 31
#> 2 Female (30,40] 1686
#> 3 Female (40,50] 9055
#> 4 Female (50,60] 7860
#> 5 Female (60,70] 1374
#> 6 Female (70, Inf] 53
#> 7 Male (20,30] 27
#> 8 Male (30,40] 1693
#> 9 Male (40,50] 8964
#> 10 Male (50,60] 7782
#> 11 Male (60,70] 1404
#> 12 Male (70, Inf] 71population_df %>%
population_pyramid()Funnel plot is a visual representation of how individual medical units (e.g. hospitals) perform compared to their peers and the overall average. It also helps to identify the outliers (i.e the ones who are performing better or worse than the average). The stat_funnelcount and stat_funnelcontinuous functions provide easy options to add funnels to an existing ggplot. We will use the average mortality rate and length of dtay as the benchmarks and will then make funnel plots using the stat_funnelcount and stat_funnelcontinuous functions.
n_sites <- fake_data %>% pull(site_id) %>% n_distinct()
benchmark_mortality <-
fake_data %>%
summarise(benchmark = sum(dead) / n()) %>%
pull(benchmark)
fake_data_grouped <-
fake_data %>%
group_by(site_id) %>%
summarise(n_deaths = sum(dead),
mean_los = mean(los),
n = n())
fake_data_grouped <-
fake_data_grouped %>%
mutate(death_rate = n_deaths / n,
site_id = as_factor(site_id)) %>%
# we need to manually change the denominator for the funnel plots, to make it look nicer :)
mutate(n = seq(100, 2000, length.out = n_sites),
sd_los = rpois(dplyr::n(), 5))
fake_data_grouped %>%
ggplot(aes(x = n, y = death_rate)) +
geom_hline(yintercept = benchmark_mortality,
col = "dark blue") +
geom_point() +
stat_funnelcount(fill = "dark blue",
alpha = 0.5,
ci_limits = 0.95)+
stat_funnelcount(fill = "light blue",
alpha = 0.5,
ci_limits = 0.998) +
geom_point() +
scale_y_continuous(expand = c(0, 0),
limits = c(0, 1),
labels = percent_format()
) +
coord_cartesian(ylim = c(0, NA),
xlim = c(0, NA)) +
theme_bw() +
labs(
x = "Number of surgeries",
y = "Mortality rate"
)
benchmark_los <-
fake_data %>%
summarise(benchmark = mean(los)) %>%
pull(benchmark)
fake_data_grouped %>%
ggplot(aes(x = n, y = mean_los, sd = sd_los)) +
geom_hline(yintercept = benchmark_los,
col = "dark blue") +
stat_funnelcontinuous(fill = "dark blue",
alpha = 0.5,
ci_limits = 0.95) +
stat_funnelcontinuous(fill = "light blue",
alpha = 0.5,
ci_limits = 0.998) +
scale_y_continuous(expand = c(0, 0),
limits = c(35, 48),
) +
geom_point() +
labs(
x = "Number of surgeries",
y = "Mean length of stay (days)"
) +
theme_bw()Let’s find out what model we should use to adjust the outcome, we can do that by using the stepwise function
model_mortality <-
stepwise(out_var = "dead",
ind_var = c("sex_cat", "age"),
data = fake_data,
model = "logistic")
#> Estimate Std. Error z value Pr(>|z|)
#> (Intercept) -0.7766145 0.01076343 -72.15307 0
#> +++ Adding age
#>
#> Estimate Std. Error z value Pr(>|z|)
#> (Intercept) -1.74689249 0.077653691 -22.49594 4.548809e-112
#> age 0.01933368 0.001527017 12.66108 9.715258e-37
#> +++ Adding sex_cat
#>
#> Estimate Std. Error z value Pr(>|z|)
#> (Intercept) -1.82350003 0.078494266 -23.230997 2.214221e-119
#> age 0.01931928 0.001527943 12.643983 1.207769e-36
#> sex_catMale 0.15294679 0.021590857 7.083868 1.401851e-12
mortality_adj_vars <- labels(terms(formula(model_mortality)))model_los <-
stepwise(out_var = "los",
ind_var = c("sex_cat", "age"),
data = fake_data,
model = "linear")
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 40.98356 0.03305549 1239.841 0
#> +++ Adding age
#>
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 11.0906523 0.181768636 61.01521 0
#> age 0.5977499 0.003598968 166.08926 0
#> +++ Adding sex_cat
#>
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 10.1303779 0.179959114 56.29266 0
#> age 0.5972729 0.003530606 169.17007 0
#> sex_catMale 1.9740782 0.049894253 39.56524 0
los_adj_vars <- labels(terms(formula(model_mortality)))Now, let’s risk adjust the data using the models found using the stepwise function. We can apply risk adjustment by risk_adj function.
fake_data <-
fake_data %>%
ungroup() %>%
drop_na(sex, age)
risk_adjust(data = fake_data,
site_id = "site_id",
outcome = "dead",
outcome_type = "binary",
adj_vars = mortality_adj_vars)
#> # A tibble: 20 × 3
#> site_id n_outcome n_outcome_adjusted
#> <int> <int> <dbl>
#> 1 1 642 641
#> 2 2 617 618
#> 3 3 633 634
#> 4 4 635 635
#> 5 5 597 596
#> 6 6 624 622
#> 7 7 627 626
#> 8 8 657 656
#> 9 9 593 597
#> 10 10 632 636
#> 11 11 614 614
#> 12 12 643 643
#> 13 13 651 652
#> 14 14 631 629
#> 15 15 640 640
#> 16 16 634 635
#> 17 17 638 637
#> 18 18 594 594
#> 19 19 664 661
#> 20 20 636 636
risk_adjust(data = fake_data,
site_id = "site_id",
outcome = "los",
outcome_type = "continuous",
adj_vars = los_adj_vars)
#> # A tibble: 20 × 4
#> site_id mean_outcome mean_outcome_adj sd_outcome
#> <int> <dbl> <dbl> <dbl>
#> 1 1 41.1 41.0 5.11
#> 2 2 40.9 41.0 5.03
#> 3 3 40.8 40.8 4.89
#> 4 4 40.9 40.9 4.94
#> 5 5 41.0 41.0 5.03
#> 6 6 41.3 41.1 4.99
#> 7 7 41.1 41.0 4.98
#> 8 8 41.0 40.9 5.04
#> 9 9 40.8 41.0 5.03
#> 10 10 40.8 41.0 5.09
#> 11 11 40.9 40.8 4.90
#> 12 12 41.2 41.2 4.98
#> 13 13 40.9 41.0 4.98
#> 14 14 41.1 41.0 4.98
#> 15 15 41.1 41.1 4.93
#> 16 16 41.0 41.1 4.87
#> 17 17 41.1 41.0 4.98
#> 18 18 41.1 41.0 4.98
#> 19 19 41.2 41.0 5.06
#> 20 20 40.7 40.7 5.00iris %>% outlier(n = 2)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 5.4 3.9 1.7 0.4 setosa
#> 2 5.8 4.0 1.2 0.2 setosa
#> 3 5.7 4.4 1.5 0.4 setosa
#> 4 5.4 3.9 1.3 0.4 setosa
#> 5 5.2 4.1 1.5 0.1 setosa
#> 6 5.5 4.2 1.4 0.2 setosa
#> 7 5.0 2.0 3.5 1.0 versicolor
#> 8 7.9 3.8 6.4 2.0 virginica
#> outlier_Sepal.Length outlier_Sepal.Width outlier_Petal.Length
#> 1 0 1 0
#> 2 0 1 0
#> 3 0 1 0
#> 4 0 1 0
#> 5 0 1 0
#> 6 0 1 0
#> 7 0 1 0
#> 8 1 0 0
#> outlier_Petal.Width
#> 1 0
#> 2 0
#> 3 0
#> 4 0
#> 5 0
#> 6 0
#> 7 0
#> 8 0iris %>% outlier(n = 3)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1 5.7 4.4 1.5 0.4 setosa
#> outlier_Sepal.Length outlier_Sepal.Width outlier_Petal.Length
#> 1 0 1 0
#> outlier_Petal.Width
#> 1 0