Getting the normalized competitive index

The noramlized competitive index (NCI) is first obtained by the following formula:

\[ CI = \frac{\frac{n_{prog}-n_{unfillprog}}{n_{prog}}}{\frac{n_{matchall}}{n_{appsall}}} \] \[ NCI = \frac{CI}{CI_{total}} \]

df <- df %>%
  dplyr::group_by(year) %>%
  dplyr::mutate(tot_ci = ((sum(n_prog)-sum(unfill_prog))/sum(n_prog))/
                  (sum(n_match_all)/sum(n_apps_all))) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(ci = ((n_prog-unfill_prog)/n_prog)/(n_match_all/n_apps_all),
                nci = ci/tot_ci)

Graphing NCI trends

The NCI trends per specialty can be graphed across the years:

df %>%
 ggplot(mapping = aes(x = year, y = nci)) +
  geom_point() +
  geom_hline(yintercept = 1, linetype = "dashed", color = "red") +
  geom_smooth(method='lm', formula= y~x, se = FALSE) +
  labs(title = "Normalized Competitive Index Trends by Specialty",
       y = "Normalized Competitive Index",
       x = "Year") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
        panel.grid.minor = element_blank()) +
  scale_x_continuous(breaks = seq(2008, 2020, 1)) +
  facet_wrap(~specialty)

Table 1

Pediatric Subspecialty Applicant and Match Characteristics, 2008 to 2020

df %>%
  dplyr::group_by(specialty) %>%
  dplyr::summarise(match_rate = sum(n_match_all)/sum(n_apps_all) * 100,
                   filled_prog = (sum(n_prog)-sum(unfill_prog))/sum(n_prog) * 100,
                   ranks_per_app = sum(rank_us)/sum(n_apps_us),
                   us_match_rate = sum(n_match_us)/sum(n_apps_us) * 100,
                   first_choice_match = sum(first)/sum(n_match_all) * 100,
                   first_choice_apps = sum(first)/sum(n_apps_all) * 100,
                   top_three_match = sum(first, second, third)/sum(n_match_all) * 100,
                   top_three_apps = sum(first, second, third)/sum(n_apps_all) * 100
                   ) %>%
  dplyr::arrange(specialty) %>%
  knitr::kable()

specialty	match_rate	filled_prog	ranks_per_app	us_match_rate	first_choice_match	first_choice_apps	top_three_match	top_three_apps
Academic General Pediatrics	81.8	43.3	1.61	83.3	94.4	77.3	NA	NA
Adolescent Medicine	86.6	69.3	5.32	88.6	63.4	54.9	88.7	76.8
Cardiology	76.2	91.7	6.45	85.2	NA	NA	NA	NA
Child Abuse	81.8	52.0	3.65	83.8	82.7	67.7	NA	NA
Critical Care Medicine	88.5	80.7	5.88	91.1	NA	NA	NA	NA
Developmental and Behavioral Pediatrics	85.9	61.0	3.87	88.5	71.6	61.6	NA	NA
Emergency Medicine	77.8	93.7	6.28	83.8	NA	NA	NA	NA
Endocrinology	93.6	56.7	6.14	97.9	70.2	65.7	93.2	87.2
Gastroenterology	78.2	89.7	6.49	88.8	NA	NA	NA	NA
Hematology/Oncology	87.3	83.0	6.13	92.5	NA	NA	NA	NA
Hospital Medicine	71.4	90.0	4.43	73.9	60.4	43.1	NA	NA
Infectious Diseases	91.3	52.4	5.37	96.3	74.2	67.8	NA	NA
Neonatal-Perinatal Medicine	91.1	78.0	5.83	94.0	59.2	53.9	87.0	79.3
Nephrology	92.4	40.9	4.71	98.0	73.0	67.5	NA	NA
Pulmonology	91.9	51.6	5.08	96.3	68.0	62.5	93.0	85.5
Rheumatology	85.0	55.9	4.44	93.1	NA	NA	NA	NA
Sports Medicine	30.0	92.9	2.39	31.4	NA	NA	NA	NA
Transplant Hepatology	100.0	71.4	1.86	100.0	100.0	100.0	NA	NA

Table 2

Comparison of Match Rates, Percent Filled Programs, and NCI between Specialties, 2008 to 2020

ci_upper <- function(p, n) {
  p+(1.96*sqrt((p*(1-p))/n))
}
ci_lower <- function(p, n) {
  p-(1.96*sqrt((p*(1-p))/n))
}

ci_mean_upper <- function(x) {
  mean(x) + (1.96*(sd(x)/sqrt(length(x))))
}

ci_mean_lower <- function(x) {
  mean(x) - (1.96*(sd(x)/sqrt(length(x))))
}

df %>%
  dplyr::group_by(specialty) %>%
  dplyr::summarise(match_rate = sum(n_match_all)/sum(n_apps_all) * 100,
                   match_rate_up = ci_upper(sum(n_match_all)/sum(n_apps_all), sum(n_apps_all)) * 100,
                   match_rate_low = ci_lower(sum(n_match_all)/sum(n_apps_all), sum(n_apps_all)) * 100,
                   filled_prog = (sum(n_prog)-sum(unfill_prog))/sum(n_prog) * 100,
                   filled_prog_up = ci_upper((sum(n_prog)-sum(unfill_prog))/sum(n_prog), sum(n_prog)) * 100,
                   filled_prog_low = ci_lower((sum(n_prog)-sum(unfill_prog))/sum(n_prog), sum(n_prog)) * 100,
                   nci_mean = mean(nci),
                   nci_up = ci_mean_upper(nci),
                   nci_low = ci_mean_lower(nci)) %>%
  dplyr::arrange(specialty) %>%
  knitr::kable()

specialty	match_rate	match_rate_up	match_rate_low	filled_prog	filled_prog_up	filled_prog_low	nci_mean	nci_up	nci_low
Academic General Pediatrics	81.8	97.9	65.7	43.3	61.1	25.6	0.634	0.738	0.530
Adolescent Medicine	86.6	91.1	82.1	69.3	75.9	62.7	0.911	1.094	0.727
Cardiology	76.2	78.0	74.4	91.7	93.8	89.7	1.319	1.405	1.232
Child Abuse	81.8	89.4	74.2	52.0	60.1	44.0	0.718	0.891	0.544
Critical Care Medicine	88.5	89.8	87.1	80.7	83.5	78.0	0.985	1.132	0.839
Developmental and Behavioral Pediatrics	85.9	89.7	82.1	61.0	66.4	55.6	0.806	0.924	0.689
Emergency Medicine	77.8	79.4	76.2	93.7	95.3	92.2	1.306	1.389	1.223
Endocrinology	93.6	95.7	91.5	56.7	61.1	52.3	0.682	0.782	0.582
Gastroenterology	78.2	80.4	75.9	89.7	92.0	87.3	1.270	1.367	1.172
Hematology/Oncology	87.3	88.7	85.9	83.0	85.6	80.5	1.034	1.113	0.955
Hospital Medicine	71.4	76.0	66.8	90.0	94.0	85.9	1.392	1.642	1.142
Infectious Diseases	91.3	94.2	88.4	52.4	57.2	47.5	0.649	0.730	0.567
Neonatal-Perinatal Medicine	91.1	92.1	90.0	78.0	80.4	75.6	0.945	1.024	0.866
Nephrology	92.4	95.2	89.7	40.9	45.6	36.3	0.499	0.591	0.407
Pulmonology	91.9	94.4	89.5	51.6	56.0	47.1	0.625	0.697	0.554
Rheumatology	85.0	89.0	81.0	55.9	61.2	50.5	0.731	0.792	0.671
Sports Medicine	30.0	33.4	26.6	92.9	97.0	88.9	3.254	3.626	2.882
Transplant Hepatology	100.0	100.0	100.0	71.4	95.1	47.8	0.865	NA	NA

Table 5

Correlation between NCI and Other Metrics

df %>%
  dplyr::summarise(match_rate = cor(nci, n_match_all/n_apps_all),
                   fill_prog = cor(nci, (n_prog-unfill_prog)/n_prog),
                   ranks_per_app = cor(nci, rank_us/n_apps_us, use = "complete.obs"),
                   us_match_rate = cor(nci, n_match_us/n_apps_us, use = "complete.obs"),
                   first_choice_match = cor(nci, first/n_match_all, use = "complete.obs"),
                   first_choice_apps = cor(nci, first/n_apps_all, use = "complete.obs"),
                   top_three_match = cor(nci, (first+second+third)/n_match_all, use = "complete.obs"),
                   top_three_apps = cor(nci, (first+second+third)/n_apps_all, use = "complete.obs")) %>%
  t %>%
  knitr::kable()
#> Warning in kable_pipe(x = structure(c("match_rate", "fill_prog",
#> "ranks_per_app", : The table should have a header (column names)

match_rate	-0.904
fill_prog	0.657
ranks_per_app	-0.256
us_match_rate	-0.896
first_choice_match	-0.470
first_choice_apps	-0.825
top_three_match	-0.441
top_three_apps	-0.919

Pediatric Match Analysis