Create and validate a predictive model to understand a medical student’s chances of matching into obstetrics and gynecology residency.
invisible(gc())
.fit
e.g. ‘logit.fit’)train_match <- read.csv(file = "output/csv/train_dat_2017_2018_years.csv") %>%
dplyr::select(-X, -Year, -Location)
# Recode the "Type_of_medical_school" variable
train_match$Type_of_medical_school <- ifelse(train_match$Type_of_medical_school == "Osteopathic School,International School",
"Osteopathic School",
train_match$Type_of_medical_school)
test_match <- read.csv(file = "output/csv/test_dat_2019_2020_years.csv") %>%
dplyr::select(-X, -Year, -Location)
# Recode the "Type_of_medical_school" variable
test_match$Type_of_medical_school <- ifelse(test_match$Type_of_medical_school == "Osteopathic School,International School",
"Osteopathic School",
test_match$Type_of_medical_school)
We generated the nomogram to provide a pre-match, personalized
estimate of the chance of matching into OBGYN residency at all
institutions whereby points in the nomogram were assigned in proportion
to the effect sizes in the multivariable logistic regression analysis
model. The nomogram was based on presurgical variables including
pre-Match education preparations, research accomplishments, and
applicant demographics.
Points were allocated for each variable, summed, and then used to
calculate a medical student-specific, pre-application risk chance of
Matching. The nomogram illustrates the strength of association of the
predictors to the outcome as well as the nonlinear associations between
age and count of poster presentations and matching.
t.data <- rms::datadist(train_match)
options(datadist = t.data)
fit <- rms::lrm(formula = Match_Status ~ .,
data = train_match)
rms_nomo <- rms::nomogram(fit = fit,
Age = c(25, 30, 35, 40, 45, 50),
number_of_applicant_first_author_publications = c(0, 4, 8, 10),
Count_of_Poster_Presentation = c(0, 10, 20),
Count_of_Oral_Presentation = c(0, 10, 20, 30),
Count_of_Peer_Reviewed_Journal_Articles_Abstracts_Other_than_Published = c(0, 10, 20, 40),
total_OBGYN_letter_writers = c(0,2,4),
reco_count = c(2,4),
Count_of_Peer_Reviewed_Journal_Articles_Abstracts = c(0, 10, 20),
Volunteer_exp_count = c(0,10),
work_exp_count = c(0,10),
Research_exp_count = c(0,5,10,15),
fun = plogis,
fun.at = c(0.001, 0.01, 0.05, seq(0.2, 0.8, by = 0.2), 0.95, 0.99, 0.999),
funlabel = "Chance of Matching in OBGYN Residency",
lp =FALSE,
abbrev = F,
varname.label.sep=": ",
est.all=TRUE,
minlength = 1,
conf.int = FALSE,
verbose = FALSE,
maxscale = 100)
# Looks good on the screen
##global settings
par(mfrow = c(1,1), mar = c(0,0,0,0),font = 7,font.axis = 7) #, cex.axis = .75)
tiff("output/fig/nomogram_from_04_nomogram.tiff", units="in", width=8, height=6, res=800)
plot(rms_nomo,
font.lab = 7,
lplabel = "Linear Predictor",
cex.sub = 0.3, cex.axis = 0.4, cex.main = 1,
cex.var = 0.5, #Size of variable names
cex.lab = 0.4,
ps = 10,
xfrac = 0.2,
conf.space = c(0.1, 0.5),
label.every = 1,
col.grid = gray(c(0.8, 0.95)),
total.sep.page = FALSE,
cap.labels = TRUE,
total.points.label="Sum of all points",
which = "Match_Status")
dev.off()
#> svg
#> 2
# Looks good on the TIFF: Not that great.
##global settings
par(mfrow = c(1,1), mar = c(0,0,0,0)) # Set the margins and font size for the plot
#tiff("output/fig/nomogram_from_04_nomogram.tiff", units="in", width=8, height=6, res=800)
plot(rms_nomo,
font.lab = 7, ## Set the font size for the labels
lplabel = "Linear Predictor", # Set the label for the linear predictor
cex.sub = 0.3, # Set the font sizes for subtitles
cex.axis = 0.4, # Set the font sizes for axes
cex.main = 1, # Set the font sizes for main title
cex.var = 0.5, # Set the size of the variable names
cex.lab = 0.3, # Set the size of the labels
ps = 10, # Set the font size for the percentage scale
xfrac = 0.5, # Set the fraction of the width to use for the x-axis labels
label.every = 1, ## Set the frequency of tick labels on the scale
col.grid = gray(c(0.8, 0.95)), # Set the color for the grid lines
which = "Match_Status") ## Set which outcome to show in the nomogram
#dev.off()
#https://www.kaggle.com/pjmcintyre/titanic-first-kernel#final-checks
tm_nomogram_prep <- function(df){ #signature of the function
set.seed(1978) #body of the function
print("Function Sanity Check: Creation of Nomogram")
library(rms)
test <- rms::nomogram(df,
#lp.at = seq(-3,4,by=0.5),
fun = plogis,
fun.at = c(0.001, 0.01, 0.05, seq(0.2, 0.8, by = 0.2), 0.95, 0.99, 0.999),
funlabel = "Chance of Matching in OBGYN",
lp =FALSE,
#conf.int = c(0.1,0.7),
abbrev = F,
minlength = 9)
tm_plot <- plot(test, lplabel="Linear Predictor",
cex.sub = 0.3, cex.axis=0.4, cex.main=1, cex.lab=0.2, ps=10, xfrac=1,
label.every=1,
col.grid = gray(c(0.8, 0.95)),
which="Match_Status")
return(tm_plot)
}
tm_nomogram_prep(fit)
#> [1] "Function Sanity Check: Creation of Nomogram"
#> NULL
# parallel::stopCluster(cl)
# #grDevices::dev.off()
# invisible(gc())
# sessioninfo::session_info()
# beepr::beep(sound = 4)
# if (!interactive())
# q("no")
report::report_packages(include_R = TRUE)
#> - rmarkdown (version 2.16; Allaire J et al., 2022)
#> - visNetwork (version 2.1.0; Almende B.and Contributors et al., 2021)
#> - doMC (version 1.3.8; Analytics R, Weston S, 2022)
#> - iterators (version 1.0.14; Analytics R, Weston S, 2022)
#> - DescTools (version 0.99.46; Andri et mult. al. S, 2022)
#> - countrycode (version 1.4.0; Arel-Bundock V et al., 2018)
#> - PASWR (version 1.3; Arnholt A, 2022)
#> - ggthemes (version 4.2.4; Arnold J, 2021)
#> - RANN (version 2.6.1; Arya S et al., 2019)
#> - rsconnect (version 0.8.27; Atkins A et al., 2022)
#> - ezknitr (version 0.6; Attali D, 2016)
#> - shinyjs (version 2.1.0; Attali D, 2021)
#> - beepr (version 1.3; Bååth R, 2018)
#> - magrittr (version 2.0.3; Bache S, Wickham H, 2022)
#> - Matrix (version 1.5.1; Bates D et al., 2022)
#> - R.methodsS3 (version 1.8.2; Bengtsson H, 2003)
#> - quanteda (version 3.2.3; Benoit K et al., 2018)
#> - rgdal (version 1.5.32; Bivand R et al., 2022)
#> - grpreg (version 3.4.0; Breheny P, Huang J, 2015)
#> - rmda (version 1.6; Brown M, 2018)
#> - funModeling (version 1.9.4; Casas P, 2020)
#> - shiny (version 1.7.2; Chang W et al., 2022)
#> - xgboost (version 1.6.0.1; Chen T et al., 2022)
#> - epiDisplay (version 3.5.0.2; Chongsuvivatwong V, 2022)
#> - summarytools (version 1.0.1; Comtois D, 2022)
#> - doParallel (version 1.0.17; Corporation M, Weston S, 2022)
#> - infer (version 1.0.3; Couch SP et al., 2021)
#> - gitcreds (version 0.1.2; Csárdi G, 2022)
#> - callr (version 3.7.3.9000; Csárdi G, Chang W, 2023)
#> - progress (version 1.2.2; Csárdi G, FitzJohn R, 2019)
#> - remotes (version 2.4.2; Csárdi G et al., 2021)
#> - DataExplorer (version 0.8.2; Cui B, 2020)
#> - qlcMatrix (version 0.9.7; Cysouw M, 2018)
#> - correlationfunnel (version 0.2.0; Dancho M, 2020)
#> - tidyquant (version 1.0.5; Dancho M, Vaughan D, 2022)
#> - pander (version 0.6.5; Daróczi G, Tsegelskyi R, 2022)
#> - caretEnsemble (version 2.0.1; Deane-Mayer ZA, Knowles JE, 2019)
#> - data.table (version 1.14.2; Dowle M, Srinivasan A, 2021)
#> - BH (version 1.78.0.0; Eddelbuettel D et al., 2021)
#> - tidylog (version 1.0.2; Elbers B, 2020)
#> - janitor (version 2.1.0; Firke S, 2021)
#> - english (version 1.2.6; Fox J et al., 2021)
#> - car (version 3.1.0; Fox J, Weisberg S, 2019)
#> - carData (version 3.0.5; Fox J et al., 2022)
#> - glmnet (version 4.1.4; Friedman J et al., 2010)
#> - viridis (version 0.6.2; Garnier et al., 2021)
#> - viridisLite (version 0.4.1; Garnier et al., 2022)
#> - fansi (version 1.0.3; Gaslam B, 2022)
#> - perturbR (version 0.1.3; Gates K et al., 2019)
#> - dtw (version 1.23.1; Giorgino T, 2009)
#> - mltools (version 0.3.5; Gorman B, 2018)
#> - gbm (version 2.1.8.1; Greenwell B et al., 2022)
#> - vip (version 0.3.2; Greenwell BM, Boehmke BC, 2020)
#> - lubridate (version 1.8.0; Grolemund G, Wickham H, 2011)
#> - gss (version 2.2.3; Gu C, 2014)
#> - Metrics (version 0.1.4; Hamner B, Frasco M, 2018)
#> - Hmisc (version 4.7.1; Harrell Jr F, 2022)
#> - rms (version 6.3.0; Harrell Jr FE, 2022)
#> - RSelenium (version 1.7.9; Harrison J, 2022)
#> - earth (version 5.3.1; Hastie SMDfmbT, wrapper. RTUAMFuwTLl, 2021)
#> - exploratory (version 6.12.3.5; Hayashi H et al., 2023)
#> - arsenal (version 3.6.3; Heinzen E et al., 2021)
#> - anonymizer (version 0.2.2; Hendricks P, 2022)
#> - rlang (version 1.0.6.9000; Henry L, Wickham H, 2023)
#> - glue (version 1.6.2.9000; Hester J, Bryan J, 2023)
#> - odbc (version 1.3.3; Hester J, Wickham H, 2021)
#> - fs (version 1.5.2; Hester J et al., 2021)
#> - stargazer (version 5.2.3; Hlavac M, 2022)
#> - MatchIt (version 4.4.0; Ho DE et al., 2011)
#> - Rmisc (version 1.5.1; Hope RM, 2022)
#> - slam (version 0.1.50; Hornik K et al., 2022)
#> - discrim (version 1.0.0; Hvitfeldt E, Kuhn M, 2022)
#> - lime (version 0.5.3; Hvitfeldt E et al., 2022)
#> - DiagrammeRsvg (version 0.1; Iannone R, 2016)
#> - DiagrammeR (version 1.0.9; Iannone R, 2022)
#> - mctest (version 1.3.1; Imdad MU, Aslam M, 2020)
#> - plotrix (version 3.8.2; J L, 2006)
#> - pscl (version 1.5.5; Jackman S, 2020)
#> - DynNom (version 5.0.2; Jalali A et al., 2022)
#> - ggformula (version 0.10.2; Kaplan D, Pruim R, 2022)
#> - fastDummies (version 1.6.3; Kaplan J, 2020)
#> - kernlab (version 0.9.31; Karatzoglou A et al., 2022)
#> - factoextra (version 1.0.7; Kassambara A, Mundt F, 2020)
#> - humaniformat (version 0.6.0; Keyes O, 2016)
#> - urltools (version 1.7.3; Keyes O et al., 2019)
#> - ppcor (version 1.1; Kim S, 2015)
#> - SparseM (version 1.81; Koenker R, 2021)
#> - moments (version 0.14.1; Komsta L, Novomestky F, 2022)
#> - caret (version 6.0.93; Kuhn M, 2022)
#> - modeldata (version 1.0.1; Kuhn M, 2022)
#> - tune (version 1.0.0; Kuhn M, 2022)
#> - workflowsets (version 1.0.0; Kuhn M, Couch S, 2022)
#> - dials (version 1.0.0; Kuhn M, Frick H, 2022)
#> - AppliedPredictiveModeling (version 1.1.7; Kuhn M, Johnson K, 2018)
#> - parsnip (version 1.0.1; Kuhn M, Vaughan D, 2022)
#> - yardstick (version 1.1.0; Kuhn M et al., 2022)
#> - tidymodels (version 1.0.0; Kuhn M, Wickham H, 2020)
#> - recipes (version 1.0.1; Kuhn M, Wickham H, 2022)
#> - Boruta (version 7.0.0; Kursa MB, Rudnicki WR, 2010)
#> - coefplot (version 1.2.8; Lander JP, 2022)
#> - ezkable (version 0.0.0.9000; Lang G, 2022)
#> - backports (version 1.4.1; Lang M, R Core Team, 2021)
#> - ezplot (version 1.0.0; Lang' ', 2022)
#> - mlbench (version 2.1.3; Leisch F, Dimitriadou E, 2021)
#> - ResourceSelection (version 0.3.5; Lele SR et al., 2019)
#> - randomForest (version 4.7.1.1; Liaw A, Wiener M, 2002)
#> - corrmorant (version 0.0.0.9007; Link R, 2020)
#> - sjmisc (version 2.8.9; Lüdecke D, 2018)
#> - naivebayes (version 0.9.7; Majka M, 2019)
#> - robotstxt (version 0.7.13; Meissner P, Ren K, 2020)
#> - scoring (version 0.6; Merkle EC, Steyvers M, 2013)
#> - proxy (version 0.4.27; Meyer D, Buchta C, 2022)
#> - e1071 (version 1.7.11; Meyer D et al., 2022)
#> - foreach (version 1.5.2; Microsoft, Weston S, 2022)
#> - plotmo (version 3.6.2; Milborrow S, 2022)
#> - rpart.plot (version 3.1.1; Milborrow S, 2022)
#> - leaps (version 3.1; Miller TLboFcbA, 2020)
#> - here (version 1.0.1; Müller K, 2020)
#> - hms (version 1.1.2; Müller K, 2022)
#> - tibble (version 3.1.8; Müller K, Wickham H, 2022)
#> - RSQLite (version 2.2.17; Müller K et al., 2022)
#> - RColorBrewer (version 1.1.3; Neuwirth E, 2022)
#> - bit (version 4.0.4; Oehlschlägel J, Ripley B, 2020)
#> - bit64 (version 4.0.5; Oehlschlägel J, Silvestri L, 2020)
#> - magick (version 2.7.3; Ooms J, 2021)
#> - sp (version 1.5.0; Pebesma EJ, Bivand RS, 2005)
#> - ggforce (version 0.3.4; Pedersen T, 2022)
#> - shinyWidgets (version 0.7.3; Perrier V et al., 2022)
#> - utf8 (version 1.2.2; Perry PO, 2021)
#> - ipred (version 0.9.13; Peters A, Hothorn T, 2022)
#> - PerformanceAnalytics (version 2.0.4; Peterson BG, Carl P, 2020)
#> - bitops (version 1.0.7; port SobSDiR et al., 2021)
#> - InformationValue (version 1.2.3; Prabhakaran S, 2016)
#> - mosaicData (version 0.20.3; Pruim R et al., 2022)
#> - mosaic (version 1.8.4; Pruim R et al., 2017)
#> - foreign (version 0.8.83; R Core Team, 2022)
#> - R (version 4.2.2; R Core Team, 2022)
#> - psych (version 2.2.5; Revelle W, 2022)
#> - textshape (version 1.7.3; Rinker TW, 2021)
#> - pROC (version 1.18.0; Robin X et al., 2011)
#> - broom (version 1.0.1; Robinson D et al., 2022)
#> - sparsesvd (version 0.2.1; Rohde D et al., 2022)
#> - inspectdf (version 0.0.12; Rushworth A, 2022)
#> - xts (version 0.12.1; Ryan JA, Ulrich JM, 2020)
#> - quantmod (version 0.4.20; Ryan JA, Ulrich JM, 2022)
#> - plotROC (version 2.3.0; Sachs MC, 2017)
#> - lattice (version 0.20.45; Sarkar D, 2008)
#> - corpcor (version 1.6.10; Schafer J et al., 2021)
#> - openxlsx (version 4.2.5; Schauberger P, Walker A, 2021)
#> - plotly (version 4.10.0; Sievert C, 2020)
#> - flexdashboard (version 0.6.0; Sievert C et al., 2022)
#> - rsample (version 1.1.0; Silge J et al., 2022)
#> - tidytext (version 0.3.4; Silge J, Robinson D, 2016)
#> - ROCR (version 1.0.11; Sing T et al., 2005)
#> - TeachingDemos (version 2.12; Snow G, 2020)
#> - compareGroups (version 4.5.1; Subirana I et al., 2014)
#> - labeling (version 0.4.2; Talbot, J, 2020)
#> - XML (version 3.99.0.10; Temple Lang D, 2022)
#> - survival (version 3.4.0; Therneau T, 2022)
#> - rpart (version 4.1.19; Therneau T, Atkinson B, 2022)
#> - naniar (version 0.6.1; Tierney N et al., 2021)
#> - caTools (version 1.18.2; Tuszynski J, 2021)
#> - TTR (version 0.24.3; Ulrich J, 2021)
#> - RcppRoll (version 0.3.0; Ushey K, 2018)
#> - renv (version 0.15.5; Ushey K, 2022)
#> - rstudioapi (version 0.14; Ushey K et al., 2022)
#> - mice (version 3.14.0; van Buuren S, Groothuis-Oudshoorn K, 2011)
#> - workflows (version 1.0.0; Vaughan D, 2022)
#> - MASS (version 7.3.58.1; Venables WN, Ripley BD, 2002)
#> - nnet (version 7.3.18; Venables WN, Ripley BD, 2002)
#> - tigris (version 1.6.1; Walker K, 2022)
#> - skimr (version 2.1.4; Waring E et al., 2022)
#> - corrplot (version 0.92; Wei T, Simko V, 2021)
#> - klaR (version 1.7.1; Weihs C et al., 2005)
#> - munsell (version 0.5.0; Wickham C, 2018)
#> - reshape2 (version 1.4.4; Wickham H, 2007)
#> - plyr (version 1.8.7; Wickham H, 2011)
#> - ggplot2 (version 3.4.0; Wickham H, 2016)
#> - forcats (version 0.5.2; Wickham H, 2022)
#> - stringr (version 1.4.1; Wickham H, 2022)
#> - tidyverse (version 1.3.2; Wickham H et al., 2019)
#> - readxl (version 1.4.1; Wickham H, Bryan J, 2022)
#> - usethis (version 2.1.6; Wickham H et al., 2022)
#> - dplyr (version 1.0.10; Wickham H et al., 2022)
#> - tidyr (version 1.2.1; Wickham H, Girlich M, 2022)
#> - purrr (version 1.0.1; Wickham H, Henry L, 2023)
#> - vctrs (version 0.5.1; Wickham H et al., 2022)
#> - readr (version 2.1.2; Wickham H et al., 2022)
#> - devtools (version 2.4.4; Wickham H et al., 2022)
#> - scales (version 1.2.1; Wickham H, Seidel D, 2022)
#> - cowplot (version 1.1.1; Wilke C, 2020)
#> - rattle (version 5.5.1; Williams GJ, 2011)
#> - corrgram (version 1.14; Wright K, 2021)
#> - ranger (version 0.14.1; Wright MN, Ziegler A, 2017)
#> - timeDate (version 4021.104; Wuertz D et al., 2022)
#> - knitr (version 1.40; Xie Y, 2022)
#> - tinytex (version 0.41; Xie Y, 2022)
#> - highr (version 0.9; Xie Y, Qiu Y, 2021)
#> - MLmetrics (version 1.1.1; Yan Y, 2016)
#> - tableone (version 0.13.2; Yoshida K, Bartel A, 2022)
#> - Formula (version 1.2.4; Zeileis A, Croissant Y, 2010)
#> - zoo (version 1.8.11; Zeileis A, Grothendieck G, 2005)
#> - lmtest (version 0.9.40; Zeileis A, Hothorn T, 2002)
#> - kableExtra (version 1.3.4; Zhu H, 2021)