3.1 Primary Analysis
The full interaction model revealed significant variation in age-tolerance relationships across ranks (interaction p = 0.029). Full professors showed a negative age-tolerance association (β = -0.085, SE = 0.038, p = 0.067), while assistant professors demonstrated no significant relationship (β = -0.013, SE = 0.029, p = 0.658). Associate professors displayed a slight positive but non-significant trend (β = 0.017, SE = 0.042, p = 0.475). The model explained 51.1% of variance in tolerance scores (adjusted R² = 0.409).
library(erikmisc)
library(tidyverse)
ggplot2::theme_set(ggplot2::theme_bw()) # set theme_bw for all plots# First, download the data to your computer,
# save in the same folder as this qmd file.
# read the data
dat_tolerate <-
read_csv("ADA2_CL_12_tolerate.csv") |>
mutate(
# set 3="Asst" as baseline level
rank = factor(rank) |> relevel(3)
, id = 1:n()
)Rows: 30 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (3): score, age, rank
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(dat_tolerate)tibble [30 × 4] (S3: tbl_df/tbl/data.frame)
$ score: num [1:30] 3.03 4.31 5.09 3.71 5.29 2.7 2.7 4.02 5.52 4.62 ...
$ age : num [1:30] 65 47 49 41 40 61 52 45 41 39 ...
$ rank : Factor w/ 3 levels "3","1","2": 2 2 2 2 2 2 2 2 2 2 ...
$ id : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...
`geom_smooth()` using formula = 'y ~ x'
lm_s_a_r_ar <-
lm(
score ~ age * rank
, data = dat_tolerate
)# plot diagnostics
e_plot_lm_diagnostics(lm_s_a_r_ar)Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 0.05251352, Df = 1, p = 0.81875
there are higher-order terms (interactions) in this model
consider setting type = 'predictor'; see ?vif
Warning in e_plot_lm_diagnostics(lm_s_a_r_ar): Note: Collinearity plot
unreliable for predictors that also have interactions in the model.
library(car)Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:purrr':
some
The following object is masked from 'package:dplyr':
recode
Anova(aov(lm_s_a_r_ar), type=3)Anova Table (Type III tests)
Response: score
Sum Sq Df F value Pr(>F)
(Intercept) 12.3528 1 30.3672 1.148e-05 ***
age 0.0817 1 0.2009 0.65802
rank 2.6243 2 3.2257 0.05744 .
age:rank 3.3610 2 4.1312 0.02872 *
Residuals 9.7628 24
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm_s_a_r_ar)
Call:
lm(formula = score ~ age * rank, data = dat_tolerate)
Residuals:
Min 1Q Median 3Q Max
-1.34746 -0.28793 0.01405 0.36653 1.07669
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.42706 0.98483 5.511 1.15e-05 ***
age -0.01321 0.02948 -0.448 0.6580
rank1 2.78490 1.51591 1.837 0.0786 .
rank2 -1.22343 1.50993 -0.810 0.4258
age:rank1 -0.07247 0.03779 -1.918 0.0671 .
age:rank2 0.03022 0.04165 0.726 0.4751
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.6378 on 24 degrees of freedom
Multiple R-squared: 0.5112, Adjusted R-squared: 0.4093
F-statistic: 5.02 on 5 and 24 DF, p-value: 0.002748
# exclude observation 7 from tolerate7 dataset
dat_tolerate7 <-
dat_tolerate |>
slice(-7)
lm7_s_a_r_ar <-
lm(
score ~ age * rank
, data = dat_tolerate7
)
library(car)
Anova(aov(lm7_s_a_r_ar), type=3)Anova Table (Type III tests)
Response: score
Sum Sq Df F value Pr(>F)
(Intercept) 12.3528 1 33.4564 6.821e-06 ***
age 0.0817 1 0.2213 0.64245
rank 2.3381 2 3.1662 0.06100 .
age:rank 2.8667 2 3.8821 0.03526 *
Residuals 8.4921 23
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm7_s_a_r_ar)
Call:
lm(formula = score ~ age * rank, data = dat_tolerate7)
Residuals:
Min 1Q Median 3Q Max
-1.34746 -0.31099 0.01162 0.30310 0.94978
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.42706 0.93827 5.784 6.82e-06 ***
age -0.01321 0.02808 -0.470 0.6425
rank1 2.58793 1.44812 1.787 0.0871 .
rank2 -1.22343 1.43853 -0.850 0.4038
age:rank1 -0.06586 0.03618 -1.821 0.0817 .
age:rank2 0.03022 0.03968 0.762 0.4540
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.6076 on 23 degrees of freedom
Multiple R-squared: 0.4706, Adjusted R-squared: 0.3555
F-statistic: 4.088 on 5 and 23 DF, p-value: 0.0084
# full data set
coef(lm_s_a_r_ar) |> round(4)(Intercept) age rank1 rank2 age:rank1 age:rank2
5.4271 -0.0132 2.7849 -1.2234 -0.0725 0.0302
# without obs 7
coef(lm7_s_a_r_ar) |> round(4)(Intercept) age rank1 rank2 age:rank1 age:rank2
5.4271 -0.0132 2.5879 -1.2234 -0.0659 0.0302
# first, find the order of the coefficients
coef(lm_s_a_r_ar)(Intercept) age rank1 rank2 age:rank1 age:rank2
5.42706473 -0.01321299 2.78490230 -1.22343359 -0.07247382 0.03022001
library(aod) # for wald.test()
## H0: Slope of Rank 1 = Rank 3 (similar to summary table above)
mR <-
rbind(
c(0, 0, 0, 0, 1, 0)
) |>
as.matrix()
vR <- c(0)
test_wald <-
wald.test(
b = coef(lm_s_a_r_ar)
, Sigma = vcov(lm_s_a_r_ar)
, L = mR
, H0 = vR
)
test_waldWald test:
----------
Chi-squared test:
X2 = 3.7, df = 1, P(> X2) = 0.055
## H0: Slope of Rank 2 = Rank 3 (similar to summary table above)
mR <-
rbind(
c(0, 0, 0, 0, 0, 1)
) |>
as.matrix()
vR <- c(0)
test_wald <-
wald.test(
b = coef(lm_s_a_r_ar)
, Sigma = vcov(lm_s_a_r_ar)
, L = mR
, H0 = vR
)
test_waldWald test:
----------
Chi-squared test:
X2 = 0.53, df = 1, P(> X2) = 0.47
## H0: Slope of Rank 1 = Rank 2 (not in summary table above)
mR <-
rbind(
c(0, 0, 0, 0, 1, -1)
) |>
as.matrix()
vR <- c(0)
test_wald <-
wald.test(
b = coef(lm_s_a_r_ar)
, Sigma = vcov(lm_s_a_r_ar)
, L = mR
, H0 = vR
)
test_waldWald test:
----------
Chi-squared test:
X2 = 7.4, df = 1, P(> X2) = 0.0065
# first, find the order of the coefficients
coef(lm_s_a_r_ar)(Intercept) age rank1 rank2 age:rank1 age:rank2
5.42706473 -0.01321299 2.78490230 -1.22343359 -0.07247382 0.03022001
library(aod) # for wald.test()
## H0: Line of Rank 1 = Rank 3
mR <-
rbind(
c(0, 0, 1, 0, 0, 0)
, c(0, 0, 0, 0, 1, 0)
) |>
as.matrix()
vR <- c(0, 0)
test_wald <-
wald.test(
b = coef(lm_s_a_r_ar)
, Sigma = vcov(lm_s_a_r_ar)
, L = mR
, H0 = vR
)
test_waldWald test:
----------
Chi-squared test:
X2 = 3.7, df = 2, P(> X2) = 0.16
## H0: Line of Rank 1 = Rank 2
mR1_2 <- rbind(
c(0, 0, 0, 1, 0, 0), # Coefficients for Rank 1
c(0, 0, 0, 0, 0, 1) # Coefficients for Rank 2
) |>
as.matrix()
vR1_2 <- c(0, 0)
test_wald_1_2 <- wald.test(
b = coef(lm_s_a_r_ar),
Sigma = vcov(lm_s_a_r_ar),
L = mR1_2,
H0 = vR1_2
)
## H0: Line of Rank 2 = Rank 3
mR2_3 <- rbind(
c(0, 0, 1, -1, 0, 0), # Coefficients for Rank 2
c(0, 0, 0, 0, 1, -1) # Coefficients for Rank 3
) |>
as.matrix()
vR2_3 <- c(0, 0)
test_wald_2_3 <- wald.test(
b = coef(lm_s_a_r_ar),
Sigma = vcov(lm_s_a_r_ar),
L = mR2_3,
H0 = vR2_3
)
# Print results
test_wald_1_2Wald test:
----------
Chi-squared test:
X2 = 0.77, df = 2, P(> X2) = 0.68
test_wald_2_3Wald test:
----------
Chi-squared test:
X2 = 8.3, df = 2, P(> X2) = 0.016
test_wald_2_3$result$chi2
chi2 df P
8.28513398 2.00000000 0.01588203
library(ggplot2)
library(aod) # For wald.test()
library(car) # For Anova()
library(gridExtra)
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
# Drop observations where age > 50
tolerate_filtered <- dat_tolerate[dat_tolerate$age <= 50, ]
# Fit the full model with interaction
lm_s_a_r_ar_filtered <- lm(score ~ age * rank, data = tolerate_filtered)
summary(lm_s_a_r_ar_filtered)
Call:
lm(formula = score ~ age * rank, data = tolerate_filtered)
Residuals:
Min 1Q Median 3Q Max
-1.34746 -0.27953 -0.05493 0.39612 0.86896
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.42706 0.94613 5.736 1.08e-05 ***
age -0.01321 0.02832 -0.467 0.646
rank1 0.02133 2.96869 0.007 0.994
rank2 -1.22343 1.45059 -0.843 0.409
age:rank1 -0.00526 0.07090 -0.074 0.942
age:rank2 0.03022 0.04001 0.755 0.458
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.6127 on 21 degrees of freedom
Multiple R-squared: 0.08518, Adjusted R-squared: -0.1326
F-statistic: 0.3911 on 5 and 21 DF, p-value: 0.8493
Anova(lm_s_a_r_ar_filtered, type=3)Anova Table (Type III tests)
Response: score
Sum Sq Df F value Pr(>F)
(Intercept) 12.3528 1 32.9025 1.079e-05 ***
age 0.0817 1 0.2177 0.6456
rank 0.2801 2 0.3731 0.6931
age:rank 0.2480 2 0.3303 0.7224
Residuals 7.8842 21
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot diagnostics
e_plot_lm_diagnostics(lm_s_a_r_ar_filtered)Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 0.2436666, Df = 1, p = 0.62157
there are higher-order terms (interactions) in this model
consider setting type = 'predictor'; see ?vif
Warning in e_plot_lm_diagnostics(lm_s_a_r_ar_filtered): Note: Collinearity plot
unreliable for predictors that also have interactions in the model.
library(car)
# Drop observations where age > 50
tolerate_filtered <- dat_tolerate[dat_tolerate$age <= 50, ]
# Fit the full model without interaction
lm_s_a_r_ar_filtered_noint <- lm(score ~ age + rank, data = tolerate_filtered)
summary(lm_s_a_r_ar_filtered_noint)
Call:
lm(formula = score ~ age + rank, data = tolerate_filtered)
Residuals:
Min 1Q Median 3Q Max
-1.32472 -0.36970 -0.00364 0.35972 0.86892
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.9896329 0.6351915 7.855 5.86e-08 ***
age 0.0001641 0.0185542 0.009 0.993
rank1 -0.3452854 0.3512975 -0.983 0.336
rank2 -0.1409191 0.2855001 -0.494 0.626
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.5946 on 23 degrees of freedom
Multiple R-squared: 0.0564, Adjusted R-squared: -0.06668
F-statistic: 0.4583 on 3 and 23 DF, p-value: 0.7141
Anova(lm_s_a_r_ar_filtered_noint, type=3)Anova Table (Type III tests)
Response: score
Sum Sq Df F value Pr(>F)
(Intercept) 21.8175 1 61.7061 5.858e-08 ***
age 0.0000 1 0.0001 0.9930
rank 0.3429 2 0.4848 0.6219
Residuals 8.1322 23
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot diagnostics
e_plot_lm_diagnostics(lm_s_a_r_ar_filtered_noint)Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 0.06355161, Df = 1, p = 0.80097
Warning in e_plot_lm_diagnostics(lm_s_a_r_ar_filtered_noint): Note:
Collinearity plot unreliable for predictors that also have interactions in the
model.
library(car)
# Drop observations where age > 50
tolerate_filtered <- dat_tolerate[dat_tolerate$age <= 50, ]
# Fit the full model with rank main effect
lm_s_a_r_ar_filtered_main <- lm(score ~ rank, data = tolerate_filtered)
summary(lm_s_a_r_ar_filtered_main)
Call:
lm(formula = score ~ rank, data = tolerate_filtered)
Residuals:
Min 1Q Median 3Q Max
-1.3250 -0.3700 -0.0050 0.3600 0.8686
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.9950 0.1841 27.135 <2e-16 ***
rank1 -0.3436 0.2869 -1.198 0.243
rank2 -0.1400 0.2603 -0.538 0.596
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.5821 on 24 degrees of freedom
Multiple R-squared: 0.0564, Adjusted R-squared: -0.02223
F-statistic: 0.7172 on 2 and 24 DF, p-value: 0.4983
Anova(lm_s_a_r_ar_filtered_main, type=3)Anova Table (Type III tests)
Response: score
Sum Sq Df F value Pr(>F)
(Intercept) 249.500 1 736.3341 <2e-16 ***
rank 0.486 2 0.7172 0.4983
Residuals 8.132 24
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot diagnostics
e_plot_lm_diagnostics(lm_s_a_r_ar_filtered_main)Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 0.06529092, Df = 1, p = 0.79832
Warning in e_plot_lm_diagnostics(lm_s_a_r_ar_filtered_main): Collinearity plot
only available with at least two predictor (x) variables.
library(car)
# Drop observations where age > 50
tolerate_filtered <- dat_tolerate[dat_tolerate$age <= 50, ]
# Fit the full model with no main effect
lm_s_a_r_ar_filtered_nomain <- lm(score ~ 1, data = tolerate_filtered)
summary(lm_s_a_r_ar_filtered_nomain)
Call:
lm(formula = score ~ 1, data = tolerate_filtered)
Residuals:
Min 1Q Median 3Q Max
-1.18407 -0.34907 -0.00407 0.36093 1.00593
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.8541 0.1108 43.81 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.5757 on 26 degrees of freedom
Anova(lm_s_a_r_ar_filtered_nomain, type=3)Anova Table (Type III tests)
Response: score
Sum Sq Df F value Pr(>F)
(Intercept) 636.17 1 1919.2 < 2.2e-16 ***
Residuals 8.62 26
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot diagnostics
e_plot_lm_diagnostics(lm_s_a_r_ar_filtered_nomain)dat_tolerate <-
dat_tolerate |>
mutate(
# indicator for Full vs (Assist & Assoc)
rankaa =
case_when(
rank %in% c(2, 3) ~ 0 # Assist & Assoc
, rank %in% c(1 ) ~ 1 # Full
)
, rankaa = factor(rankaa)
, rankaa = relevel(rankaa, "0")
)
str(dat_tolerate)tibble [30 × 5] (S3: tbl_df/tbl/data.frame)
$ score : num [1:30] 3.03 4.31 5.09 3.71 5.29 2.7 2.7 4.02 5.52 4.62 ...
$ age : num [1:30] 65 47 49 41 40 61 52 45 41 39 ...
$ rank : Factor w/ 3 levels "3","1","2": 2 2 2 2 2 2 2 2 2 2 ...
$ id : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...
$ rankaa: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
coef(lm_s_a_r_ar)(Intercept) age rank1 rank2 age:rank1 age:rank2
5.42706473 -0.01321299 2.78490230 -1.22343359 -0.07247382 0.03022001
library(aod) # for wald.test()
# Typically, we are interested in testing whether individual parameters or
# set of parameters are all simultaneously equal to 0s
# However, any null hypothesis values can be included in the vector coef.test.values.
coef_test_values <-
rep(0, length(coef(lm_s_a_r_ar)))
library(aod) # for wald.test()
test_wald <-
wald.test(
b = coef(lm_s_a_r_ar) - coef_test_values
, Sigma = vcov(lm_s_a_r_ar)
, Terms = c(4, 6)
)
test_waldWald test:
----------
Chi-squared test:
X2 = 0.77, df = 2, P(> X2) = 0.68
library(ggplot2)
dat_tolerate <-
dat_tolerate |>
mutate(
# indicator for Full vs (Assist & Assoc)
rankaa =
case_when(
rank %in% c(2, 3) ~ 0 # Assist & Assoc
, rank %in% c(1 ) ~ 1 # Full
)
, rankaa = factor(rankaa)
, rankaa = relevel(rankaa, "0")
)
str(dat_tolerate)tibble [30 × 5] (S3: tbl_df/tbl/data.frame)
$ score : num [1:30] 3.03 4.31 5.09 3.71 5.29 2.7 2.7 4.02 5.52 4.62 ...
$ age : num [1:30] 65 47 49 41 40 61 52 45 41 39 ...
$ rank : Factor w/ 3 levels "3","1","2": 2 2 2 2 2 2 2 2 2 2 ...
$ id : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...
$ rankaa: Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
# Plot score vs. age with rankaa as color
ggplot(dat_tolerate, aes(x = age, y = score, colour = rankaa, label = rankaa)) +
geom_text(size = 4) +
expand_limits(x = 0, y = 8.5) +
labs(title = "Tolerance Score Data (Full vs. AA Ranks)") +
theme_bw() + geom_smooth(method = "lm", se = FALSE, linewidth = 1) +
theme(legend.position = "none")`geom_smooth()` using formula = 'y ~ x'
# Fit a linear model including interaction between age and rankaa
lm_full <- lm(score ~ age * rankaa, data = dat_tolerate)
summary(lm_full)
Call:
lm(formula = score ~ age * rankaa, data = dat_tolerate)
Residuals:
Min 1Q Median 3Q Max
-1.26367 -0.37032 -0.05807 0.33922 1.07669
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.993406 0.682148 7.320 8.97e-08 ***
age -0.001927 0.018811 -0.102 0.9192
rankaa1 3.218561 1.315436 2.447 0.0215 *
age:rankaa1 -0.083760 0.029768 -2.814 0.0092 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.6225 on 26 degrees of freedom
Multiple R-squared: 0.4956, Adjusted R-squared: 0.4374
F-statistic: 8.515 on 3 and 26 DF, p-value: 0.0004174
# Model fit
library(car)
Anova(lm_full, type = 3)Anova Table (Type III tests)
Response: score
Sum Sq Df F value Pr(>F)
(Intercept) 20.7626 1 53.5843 8.971e-08 ***
age 0.0041 1 0.0105 0.919197
rankaa 2.3197 1 5.9867 0.021486 *
age:rankaa 3.0678 1 7.9175 0.009203 **
Residuals 10.0744 26
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# plot diagnostics
e_plot_lm_diagnostics(lm_full)Non-constant Variance Score Test
Variance formula: ~ fitted.values
Chisquare = 0.1326644, Df = 1, p = 0.71569
there are higher-order terms (interactions) in this model
consider setting type = 'predictor'; see ?vif
Warning in e_plot_lm_diagnostics(lm_full): Note: Collinearity plot unreliable
for predictors that also have interactions in the model.