library(simplFeedbackPerformance)
library(magrittr)
#> Warning: package 'magrittr' was built under R version 4.0.3We should first build a re-usable but versatile function so changing our data parameters is easy.
The first step is reducing the complete data set to our desired sample. This filters for complete, paired evaluations where the subject is “Trainee” and a “Categorical” resident in a PGY 5th year of training. Only core procedures are included.
We would like to be able to change the PGY years included, so I will include a pgy parameter in our function:
filter_data <- function(df, pgy) {
df %>%
dplyr::filter(status,
procName != "Other",
subjectRole == "Trainee",
traineeType == "CATEGORICAL",
paired,
as.numeric(traineePGY) %in% pgy,
procName %in% coredf$name
)
}
processDF <- function(df, pgy) {
df %>%
filter_data(pgy)
}Additionally, I’d like the ability (if desired) to select for only those residents that initiate evaluations:
Our proposed model is of the form DV ~ ENGAGEMENT + MONTH + COMPLEXITY + COHORT + (1|SUBJECT) + (1|PROC) + (1|PROGRAM) + (1|RATER). We have some explanatory variables that need to be built first.
monthFirst, we need to create a function to calculate month, which is the number of months since the beginning of the year, defined as July 1st. Therefore, month == 1 is equivalent to July, month == 2 is equivalent to August, and so forth. It should be easy to switch between month being an ordered and an unordered factor, but the default will be an unordered factor. When unordered, the reference level will be 1 (July). I will include a parameter called month_ordered.
create_month <- function(df, month_ordered) {
df %>%
dplyr::mutate(month = lubridate::month(date),
month = factor(dplyr::case_when(
month == 7 ~ 1,
month == 8 ~ 2,
month == 9 ~ 3,
month == 10 ~ 4,
month == 11 ~ 5,
month == 12 ~ 6,
month == 1 ~ 7,
month == 2 ~ 8,
month == 3 ~ 9,
month == 4 ~ 10,
month == 5 ~ 11,
month == 6 ~ 12
), ordered = month_ordered))
}
processDF <- function(df, pgy, enthusiastic = FALSE, month_ordered = FALSE) {
df %>%
filter_data(pgy) %>%
filter_enthusiatic_residents(enthusiastic) %>%
create_month(month_ordered)
}complexityIn order to use complexity, we will factor it and set the reference level equal to 2 (average):
create_complexity <- function(df) {
df %>%
dplyr::mutate(complexity = factor(complexity, levels = c("2","1","3"), ordered = FALSE))
}
processDF <- function(df, pgy, enthusiastic = FALSE, month_ordered = FALSE) {
df %>%
filter_data(pgy) %>%
filter_enthusiatic_residents(enthusiastic) %>%
create_month(month_ordered) %>%
create_complexity()
}cohortcohort represents the year group of a given group of subjects (because a 2015 PGY-5 is not the same as a 2016 PGY-5, etc.). Because we are using academic, and not calendar, years, this will be slightly more complicated. I will create a function that determines the academic year of a given date and creates a variable called cohort. The function will require a parameter end that is a date of the form MM-DD that indicate the last day of an academic year. In addition, just like month, I would like to be able to specify the ability to order the factor, although by default it will be unordered with the reference group set to 2015.
valiDATE <- function(date) {
stopifnot(`date must take the form of "MM-DD"` = stringr::str_detect(date, "^\\d{2}-\\d{2}$"))
}
academic_year <- function(x, end) {
valiDATE(end)
calcdiff <- function(x) {
endx <- glue::glue("{lubridate::year(x)}-{end}")
if(lubridate::yday(x) > lubridate::yday(endx)) {
year <- lubridate::year(x)
} else {
endx <- glue::glue("{lubridate::year(x)-1}-{end}")
year <- lubridate::year(x) - 1
}
}
purrr::map_dbl(x, calcdiff)
}
create_cohort <- function(df, end = "06-30", year_ordered = FALSE) {
df %>%
dplyr::mutate(cohort = factor(academic_year(date, end),
levels = c(2015, 2016, 2017, 2018, 2019),
ordered = year_ordered))
}
processDF <- function(df, pgy, enthusiastic = FALSE, month_ordered = FALSE, end = "06-30", year_ordered = FALSE) {
df %>%
filter_data(pgy) %>%
filter_enthusiatic_residents(enthusiastic) %>%
create_month(month_ordered) %>%
create_complexity() %>%
create_cohort(end, year_ordered)
}engagementNext, we should create the function to calculate engagement. engagement will be a time-invariant measure of evaluations completed (number of evaluations completed, not number of cases) for each individual per PGY year. It will be scaled across all distinct values of volume per PGY year.
create_engagement <- function(df) {
df %>%
dplyr::group_by(subjectID, traineePGY) %>%
dplyr::mutate(engagement = log10(dplyr::n()))
}
processDF <- function(df, pgy, enthusiastic = FALSE, month_ordered = FALSE, end = "06-30", year_ordered = FALSE) {
df %>%
filter_data(pgy) %>%
filter_enthusiatic_residents(enthusiastic) %>%
create_month(month_ordered) %>%
create_complexity() %>%
create_cohort(end, year_ordered) %>%
create_engagement()
}performanceThe outcome variable, performance is to be simply converted to an integer value.
create_performance <- function(df) {
df %>%
dplyr::mutate(performance = as.integer(performance))
}
processDF <- function(df, pgy, enthusiastic = FALSE, month_ordered = FALSE, end = "06-30", year_ordered = FALSE) {
df %>%
filter_data(pgy) %>%
filter_enthusiatic_residents(enthusiastic) %>%
create_month(month_ordered) %>%
create_complexity() %>%
create_cohort(end, year_ordered) %>%
create_engagement() %>%
create_performance()
}autonomyThe outcome variable, autonomy is to be supervision simply converted to an integer value.
create_autonomy <- function(df) {
df %>%
dplyr::mutate(autonomy = as.integer(supervision))
}
processDF <- function(df, pgy, enthusiastic = FALSE, month_ordered = FALSE, end = "06-30", year_ordered = FALSE) {
df %>%
filter_data(pgy) %>%
filter_enthusiatic_residents(enthusiastic) %>%
create_month(month_ordered) %>%
create_complexity() %>%
create_cohort(end, year_ordered) %>%
create_engagement() %>%
create_performance() %>%
create_autonomy()
}Now we have a final function that we can use to quickly create sample data sets for our models.
These models will utilize the following regression:
DV ~ ENGAGEMENT + MONTH + COMPLEXITY + TRAINEEPGY + (1|SUBJECT) + (1|PROC) + (1|PROGRAM) + (1|RATER)
summary(fit1)
#> Loading required package: lmerTest
#> Warning: package 'lmerTest' was built under R version 4.0.3
#> Loading required package: lme4
#> Loading required package: Matrix
#>
#> Attaching package: 'lmerTest'
#> The following object is masked from 'package:lme4':
#>
#> lmer
#> The following object is masked from 'package:stats':
#>
#> step
#> Linear mixed model fit by REML. t-tests use Satterthwaite's method [
#> lmerModLmerTest]
#> Formula: performance ~ engagement + month + complexity + traineePGY +
#> (1 | subjectID) + (1 | procName) + (1 | programID) + (1 | raterID)
#> Data: df_all
#>
#> REML criterion at convergence: 36804.3
#>
#> Scaled residuals:
#> Min 1Q Median 3Q Max
#> -5.6080 -0.5813 0.0271 0.5987 4.0138
#>
#> Random effects:
#> Groups Name Variance Std.Dev.
#> subjectID (Intercept) 0.036009 0.1898
#> raterID (Intercept) 0.113010 0.3362
#> procName (Intercept) 0.070083 0.2647
#> programID (Intercept) 0.007396 0.0860
#> Residual 0.246754 0.4967
#> Number of obs: 22762, groups:
#> subjectID, 1510; raterID, 1242; procName, 656; programID, 70
#>
#> Fixed effects:
#> Estimate Std. Error df t value Pr(>|t|)
#> (Intercept) 2.550e+00 3.148e-02 3.440e+02 81.008 < 2e-16 ***
#> engagement 1.998e-02 1.337e-02 4.473e+03 1.494 0.135194
#> month2 5.849e-02 1.685e-02 2.211e+04 3.472 0.000518 ***
#> month3 3.123e-02 1.782e-02 2.184e+04 1.753 0.079690 .
#> month4 9.933e-02 1.731e-02 2.192e+04 5.739 9.65e-09 ***
#> month5 1.206e-01 1.731e-02 2.174e+04 6.969 3.29e-12 ***
#> month6 1.200e-01 1.724e-02 2.176e+04 6.957 3.59e-12 ***
#> month7 1.322e-01 1.833e-02 2.171e+04 7.215 5.59e-13 ***
#> month8 1.477e-01 2.009e-02 2.147e+04 7.349 2.07e-13 ***
#> month9 2.119e-01 1.901e-02 2.119e+04 11.151 < 2e-16 ***
#> month10 2.140e-01 2.124e-02 2.127e+04 10.072 < 2e-16 ***
#> month11 2.177e-01 1.953e-02 2.082e+04 11.145 < 2e-16 ***
#> month12 2.353e-01 1.863e-02 2.159e+04 12.628 < 2e-16 ***
#> complexity1 1.048e-01 1.105e-02 2.191e+04 9.484 < 2e-16 ***
#> complexity3 -1.112e-01 9.027e-03 2.175e+04 -12.318 < 2e-16 ***
#> traineePGY2 3.066e-01 1.853e-02 1.022e+04 16.549 < 2e-16 ***
#> traineePGY3 6.102e-01 1.847e-02 6.576e+03 33.040 < 2e-16 ***
#> traineePGY4 9.953e-01 1.950e-02 4.881e+03 51.052 < 2e-16 ***
#> traineePGY5 1.316e+00 2.054e-02 4.020e+03 64.041 < 2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Correlation matrix not shown by default, as p = 19 > 12.
#> Use print(x, correlation=TRUE) or
#> vcov(x) if you need itsummary(fit2)
#> Linear mixed model fit by REML. t-tests use Satterthwaite's method [
#> lmerModLmerTest]
#> Formula: autonomy ~ engagement + month + complexity + traineePGY + (1 |
#> subjectID) + (1 | procName) + (1 | programID) + (1 | raterID)
#> Data: df_all
#>
#> REML criterion at convergence: 47071.6
#>
#> Scaled residuals:
#> Min 1Q Median 3Q Max
#> -4.5331 -0.6233 -0.0097 0.6493 3.7852
#>
#> Random effects:
#> Groups Name Variance Std.Dev.
#> subjectID (Intercept) 0.035518 0.1885
#> raterID (Intercept) 0.102290 0.3198
#> procName (Intercept) 0.144117 0.3796
#> programID (Intercept) 0.002693 0.0519
#> Residual 0.345925 0.5882
#> Number of obs: 24371, groups:
#> subjectID, 1539; raterID, 1269; procName, 666; programID, 71
#>
#> Fixed effects:
#> Estimate Std. Error df t value Pr(>|t|)
#> (Intercept) 1.713e+00 3.324e-02 1.060e+03 51.546 < 2e-16 ***
#> engagement 6.217e-02 1.449e-02 3.731e+03 4.291 1.82e-05 ***
#> month2 5.274e-02 1.903e-02 2.361e+04 2.771 0.0056 **
#> month3 1.362e-02 2.008e-02 2.286e+04 0.678 0.4977
#> month4 1.073e-01 1.952e-02 2.301e+04 5.494 3.98e-08 ***
#> month5 1.046e-01 1.949e-02 2.272e+04 5.365 8.16e-08 ***
#> month6 8.062e-02 1.941e-02 2.266e+04 4.153 3.30e-05 ***
#> month7 1.083e-01 2.071e-02 2.270e+04 5.229 1.72e-07 ***
#> month8 1.718e-01 2.270e-02 2.240e+04 7.567 3.98e-14 ***
#> month9 1.702e-01 2.141e-02 2.191e+04 7.948 1.98e-15 ***
#> month10 2.117e-01 2.393e-02 2.217e+04 8.845 < 2e-16 ***
#> month11 1.834e-01 2.203e-02 2.167e+04 8.328 < 2e-16 ***
#> month12 2.051e-01 2.096e-02 2.271e+04 9.784 < 2e-16 ***
#> complexity1 1.014e-01 1.254e-02 2.371e+04 8.083 6.60e-16 ***
#> complexity3 -3.099e-01 1.019e-02 2.354e+04 -30.421 < 2e-16 ***
#> traineePGY2 3.556e-01 1.966e-02 9.510e+03 18.085 < 2e-16 ***
#> traineePGY3 7.135e-01 1.951e-02 5.766e+03 36.569 < 2e-16 ***
#> traineePGY4 1.035e+00 2.068e-02 4.375e+03 50.072 < 2e-16 ***
#> traineePGY5 1.352e+00 2.174e-02 3.711e+03 62.185 < 2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Correlation matrix not shown by default, as p = 19 > 12.
#> Use print(x, correlation=TRUE) or
#> vcov(x) if you need it| Characteristic | exp(Beta) | 95% CI1 | p-value |
|---|---|---|---|
| Engagement | 1.02 | 0.99, 1.05 | 0.14 |
| Academic month | <0.001 | ||
| 1 | — | — | |
| 2 | 1.06 | 1.03, 1.10 | |
| 3 | 1.03 | 1.00, 1.07 | |
| 4 | 1.10 | 1.07, 1.14 | |
| 5 | 1.13 | 1.09, 1.17 | |
| 6 | 1.13 | 1.09, 1.17 | |
| 7 | 1.14 | 1.10, 1.18 | |
| 8 | 1.16 | 1.11, 1.21 | |
| 9 | 1.24 | 1.19, 1.28 | |
| 10 | 1.24 | 1.19, 1.29 | |
| 11 | 1.24 | 1.20, 1.29 | |
| 12 | 1.27 | 1.22, 1.31 | |
| Complexity | <0.001 | ||
| 2 | — | — | |
| 1 | 1.11 | 1.09, 1.13 | |
| 3 | 0.89 | 0.88, 0.91 | |
| Trainee PGY | <0.001 | ||
| 1 | — | — | |
| 2 | 1.36 | 1.31, 1.41 | |
| 3 | 1.84 | 1.78, 1.91 | |
| 4 | 2.71 | 2.60, 2.81 | |
| 5 | 3.73 | 3.58, 3.88 | |
|
1
CI = Confidence Interval
|
|||
| Characteristic | exp(Beta) | 95% CI1 | p-value |
|---|---|---|---|
| Engagement | 1.06 | 1.03, 1.09 | <0.001 |
| Academic month | <0.001 | ||
| 1 | — | — | |
| 2 | 1.05 | 1.02, 1.09 | |
| 3 | 1.01 | 0.97, 1.05 | |
| 4 | 1.11 | 1.07, 1.16 | |
| 5 | 1.11 | 1.07, 1.15 | |
| 6 | 1.08 | 1.04, 1.13 | |
| 7 | 1.11 | 1.07, 1.16 | |
| 8 | 1.19 | 1.14, 1.24 | |
| 9 | 1.19 | 1.14, 1.24 | |
| 10 | 1.24 | 1.18, 1.30 | |
| 11 | 1.20 | 1.15, 1.25 | |
| 12 | 1.23 | 1.18, 1.28 | |
| Complexity | <0.001 | ||
| 2 | — | — | |
| 1 | 1.11 | 1.08, 1.13 | |
| 3 | 0.73 | 0.72, 0.75 | |
| Trainee PGY | <0.001 | ||
| 1 | — | — | |
| 2 | 1.43 | 1.37, 1.48 | |
| 3 | 2.04 | 1.96, 2.12 | |
| 4 | 2.82 | 2.70, 2.93 | |
| 5 | 3.87 | 3.70, 4.03 | |
|
1
CI = Confidence Interval
|
|||