Introduction

Population pyramid plot

We will firstly create a population dataset and then use the population_pyramid function to make a population pyramid plot

population_df <-
  fake_data %>% 
  select(-sex) %>% 
  rename(sex = sex_cat) %>%
  mutate(age_group = cut(age, c(-Inf, seq(20, 70, 10), Inf))) %>%
  count(sex, age_group) 

population_df
#> # A tibble: 12 × 3
#>    sex    age_group     n
#>    <chr>  <fct>     <int>
#>  1 Female (20,30]      31
#>  2 Female (30,40]    1686
#>  3 Female (40,50]    9055
#>  4 Female (50,60]    7860
#>  5 Female (60,70]    1374
#>  6 Female (70, Inf]    53
#>  7 Male   (20,30]      27
#>  8 Male   (30,40]    1693
#>  9 Male   (40,50]    8964
#> 10 Male   (50,60]    7782
#> 11 Male   (60,70]    1404
#> 12 Male   (70, Inf]    71

population_df %>%
  population_pyramid()

Funnel plots

Funnel plot is a visual representation of how individual medical units (e.g. hospitals) perform compared to their peers and the overall average. It also helps to identify the outliers (i.e the ones who are performing better or worse than the average). The stat_funnelcount and stat_funnelcontinuous functions provide easy options to add funnels to an existing ggplot. We will use the average mortality rate and length of dtay as the benchmarks and will then make funnel plots using the stat_funnelcount and stat_funnelcontinuous functions.

n_sites <- fake_data %>% pull(site_id) %>% n_distinct()

benchmark_mortality <-
  fake_data %>%
  summarise(benchmark = sum(dead) / n()) %>%
  pull(benchmark)

fake_data_grouped <-
  fake_data %>%
  group_by(site_id) %>%
  summarise(n_deaths = sum(dead),
            mean_los = mean(los),
            n = n())

fake_data_grouped <-
  fake_data_grouped %>%
  mutate(death_rate = n_deaths / n,
         site_id = as_factor(site_id)) %>%
  # we need to manually change the denominator for the funnel plots, to make it look nicer :)
  mutate(n = seq(100, 2000, length.out = n_sites),
         sd_los = rpois(dplyr::n(), 5))

fake_data_grouped %>%
  ggplot(aes(x = n, y = death_rate)) +
  geom_hline(yintercept = benchmark_mortality,
             col = "dark blue") +
  geom_point() +
  stat_funnelcount(fill = "dark blue",
              alpha = 0.5,
              ci_limits = 0.95)+
  stat_funnelcount(fill = "light blue",
              alpha = 0.5,
              ci_limits = 0.998) +
  geom_point() +
  scale_y_continuous(expand = c(0, 0),
                     limits = c(0, 1),
                     labels = percent_format()
                     ) +
  coord_cartesian(ylim = c(0, NA),
                  xlim = c(0, NA)) +
  theme_bw() +
  labs(
    x = "Number of surgeries",
    y = "Mortality rate"
  )


benchmark_los <-
  fake_data %>%
  summarise(benchmark = mean(los)) %>%
  pull(benchmark)

fake_data_grouped %>%
  ggplot(aes(x = n, y = mean_los, sd = sd_los)) +
  geom_hline(yintercept = benchmark_los,
             col = "dark blue") +
  stat_funnelcontinuous(fill = "dark blue",
              alpha = 0.5,
              ci_limits = 0.95) +
  stat_funnelcontinuous(fill = "light blue",
              alpha = 0.5,
              ci_limits = 0.998) +
  scale_y_continuous(expand = c(0, 0),
                     limits = c(35, 48),
                     ) +
  geom_point() +
  labs(
    x = "Number of surgeries",
    y = "Mean length of stay (days)"
  ) +
  theme_bw()

Stepwise model selection

Let’s find out what model we should use to adjust the outcome, we can do that by using the stepwise function

model_mortality <-
  stepwise(out_var = "dead",
         ind_var = c("sex_cat", "age"),
         data = fake_data,
         model = "logistic")
#>               Estimate Std. Error   z value Pr(>|z|)
#> (Intercept) -0.7766145 0.01076343 -72.15307        0
#> +++ Adding age 
#> 
#>                Estimate  Std. Error   z value      Pr(>|z|)
#> (Intercept) -1.74689249 0.077653691 -22.49594 4.548809e-112
#> age          0.01933368 0.001527017  12.66108  9.715258e-37
#> +++ Adding sex_cat 
#> 
#>                Estimate  Std. Error    z value      Pr(>|z|)
#> (Intercept) -1.82350003 0.078494266 -23.230997 2.214221e-119
#> age          0.01931928 0.001527943  12.643983  1.207769e-36
#> sex_catMale  0.15294679 0.021590857   7.083868  1.401851e-12

mortality_adj_vars <- labels(terms(formula(model_mortality)))

model_los <-
  stepwise(out_var = "los",
         ind_var = c("sex_cat", "age"),
         data = fake_data,
         model = "linear")
#>             Estimate Std. Error  t value Pr(>|t|)
#> (Intercept) 40.98356 0.03305549 1239.841        0
#> +++ Adding age 
#> 
#>               Estimate  Std. Error   t value Pr(>|t|)
#> (Intercept) 11.0906523 0.181768636  61.01521        0
#> age          0.5977499 0.003598968 166.08926        0
#> +++ Adding sex_cat 
#> 
#>               Estimate  Std. Error   t value Pr(>|t|)
#> (Intercept) 10.1303779 0.179959114  56.29266        0
#> age          0.5972729 0.003530606 169.17007        0
#> sex_catMale  1.9740782 0.049894253  39.56524        0

los_adj_vars <- labels(terms(formula(model_mortality)))

Now, let’s risk adjust the data using the models found using the stepwise function. We can apply risk adjustment by risk_adj function.

fake_data <- 
  fake_data %>%
  ungroup() %>%
  drop_na(sex, age)

risk_adjust(data = fake_data, 
            site_id = "site_id",
            outcome = "dead",
            outcome_type = "binary",
            adj_vars = mortality_adj_vars)
#> # A tibble: 20 × 3
#>    site_id n_outcome n_outcome_adjusted
#>      <int>     <int>              <dbl>
#>  1       1       642                641
#>  2       2       617                618
#>  3       3       633                634
#>  4       4       635                635
#>  5       5       597                596
#>  6       6       624                622
#>  7       7       627                626
#>  8       8       657                656
#>  9       9       593                597
#> 10      10       632                636
#> 11      11       614                614
#> 12      12       643                643
#> 13      13       651                652
#> 14      14       631                629
#> 15      15       640                640
#> 16      16       634                635
#> 17      17       638                637
#> 18      18       594                594
#> 19      19       664                661
#> 20      20       636                636

risk_adjust(data = fake_data, 
            site_id = "site_id",
            outcome = "los",
            outcome_type = "continuous",
            adj_vars = los_adj_vars)
#> # A tibble: 20 × 4
#>    site_id mean_outcome mean_outcome_adj sd_outcome
#>      <int>        <dbl>            <dbl>      <dbl>
#>  1       1         41.1             41.0       5.11
#>  2       2         40.9             41.0       5.03
#>  3       3         40.8             40.8       4.89
#>  4       4         40.9             40.9       4.94
#>  5       5         41.0             41.0       5.03
#>  6       6         41.3             41.1       4.99
#>  7       7         41.1             41.0       4.98
#>  8       8         41.0             40.9       5.04
#>  9       9         40.8             41.0       5.03
#> 10      10         40.8             41.0       5.09
#> 11      11         40.9             40.8       4.90
#> 12      12         41.2             41.2       4.98
#> 13      13         40.9             41.0       4.98
#> 14      14         41.1             41.0       4.98
#> 15      15         41.1             41.1       4.93
#> 16      16         41.0             41.1       4.87
#> 17      17         41.1             41.0       4.98
#> 18      18         41.1             41.0       4.98
#> 19      19         41.2             41.0       5.06
#> 20      20         40.7             40.7       5.00

Outlier detection

iris %>% outlier(n = 2)
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
#> 1          5.4         3.9          1.7         0.4     setosa
#> 2          5.8         4.0          1.2         0.2     setosa
#> 3          5.7         4.4          1.5         0.4     setosa
#> 4          5.4         3.9          1.3         0.4     setosa
#> 5          5.2         4.1          1.5         0.1     setosa
#> 6          5.5         4.2          1.4         0.2     setosa
#> 7          5.0         2.0          3.5         1.0 versicolor
#> 8          7.9         3.8          6.4         2.0  virginica
#>   outlier_Sepal.Length outlier_Sepal.Width outlier_Petal.Length
#> 1                    0                   1                    0
#> 2                    0                   1                    0
#> 3                    0                   1                    0
#> 4                    0                   1                    0
#> 5                    0                   1                    0
#> 6                    0                   1                    0
#> 7                    0                   1                    0
#> 8                    1                   0                    0
#>   outlier_Petal.Width
#> 1                   0
#> 2                   0
#> 3                   0
#> 4                   0
#> 5                   0
#> 6                   0
#> 7                   0
#> 8                   0

iris %>% outlier(n = 3)
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
#> 1          5.7         4.4          1.5         0.4  setosa
#>   outlier_Sepal.Length outlier_Sepal.Width outlier_Petal.Length
#> 1                    0                   1                    0
#>   outlier_Petal.Width
#> 1                   0

Introduction

Farhad Salimi

29 December, 2021

Overview

Load data

Population pyramid plot

Funnel plots

Stepwise model selection

Outlier detection