Two level (game-school) models with fixed effect and random effect predictors

Author

Jingyi Yang

1. Install Packages

library(readxl)
library("readr")
library("tidyverse")

Warning: package 'ggplot2' was built under R version 4.5.2

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ purrr     1.0.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(lme4)

Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

library(lmerTest)

Warning: package 'lmerTest' was built under R version 4.5.2


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library("effectsize")

Warning: package 'effectsize' was built under R version 4.5.2

2. Import the Data

The codes for “Import the Data” is the same as the codes for “Import the Data” given in the first report “Waste Management Project Update (Part I - exploratory data analysis - 12032025)”

2.1 Clean the data

# Select the columns
data_clean <- data %>% select(`Conference`, `School`, `Area Classification (0-Rural; 1-Urban)`, `Year`, `Tenure Year`, `In-Season_Game`, `S_Diversion`, `Attendance`, `Game Time`,`Game result (Win=1; Loss=0)`,`Athletic Dept Profit`, `Athletic Dept Total Expenses`, `Athletic Dept Total Revenues`) 
# Convert game time to a numerical variable and create a new column
data_clean$GameTime_numeric <- as.numeric(format(data_clean$`Game Time`, "%H")) + as.numeric(format(data_clean$`Game Time`, "%M"))/60 
# Avoid game time impacted by the computer system date
data_clean$`Game Time`=format(data_clean$`Game Time`, format = "%H:%M") 
# Convert the categorical variables to the factor data type
data_clean <- data_clean %>% mutate(`Game Time`= as.character(`Game Time`)) %>% mutate(`Area Classification (0-Rural; 1-Urban)`= as.character(`Area Classification (0-Rural; 1-Urban)`)) %>% mutate(`Attendance`= as.numeric(`Attendance`))

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Attendance = as.numeric(Attendance)`.
Caused by warning:
! NAs introduced by coercion

# Convert the categorical variables to the factor data type
cols_to_factor <- data_clean%>% select_if(is.character) %>% colnames() 
cols_to_factor

[1] "Conference"                            
[2] "School"                                
[3] "Area Classification (0-Rural; 1-Urban)"
[4] "Game Time"                             
[5] "Game result (Win=1; Loss=0)"

# Make sure there is no NA level 
data_clean <- data_clean %>% 
  mutate(`Game result (Win=1; Loss=0)` = na_if(`Game result (Win=1; Loss=0)`, "N/A")) %>%
          mutate(across(all_of(cols_to_factor), as.factor))

# Clean the game that is being cancelled
data_clean <- subset(data_clean, !is.na(`Game result (Win=1; Loss=0)`))

# Classify game time into four time slots
data_clean$GameTime_numeric_c_1 <- with(data_clean, ifelse(
  GameTime_numeric >= 9 &  GameTime_numeric < 12, 1,  # Morning
  ifelse(GameTime_numeric >= 12 & GameTime_numeric < 15.5, 2,   # Noon
  ifelse(GameTime_numeric >= 15.5 & GameTime_numeric < 19, 3,   # Afternoon
  ifelse(GameTime_numeric >= 19,  4,   # Evening
  NA)))                                
))

# Centralize game time by 12
data_clean$GameTime_numeric_c_2 <-  data_clean$GameTime_numeric-12

# Centralize in the in-season game to 0
data_clean$`In-Season_Game_Centered` <- with(data_clean,
  ifelse(`In-Season_Game` == 1, 0,
  ifelse(`In-Season_Game` == 2, 1,
  ifelse(`In-Season_Game` == 3, 2,
  ifelse(`In-Season_Game` == 4, 3,
  ifelse(`In-Season_Game` == 5, 4,
  ifelse(`In-Season_Game` == 6, 5,
  ifelse(`In-Season_Game` == 7, 6,
  ifelse(`In-Season_Game` == 8, 7,
  ifelse(`In-Season_Game` == 9, 8, NA)))))))))
)

# Centralize the tenure year to 0
data_clean$`Tenure Year Centered` <- with(data_clean,
  ifelse(`Tenure Year` == 1, 0,
  ifelse(`Tenure Year` == 2, 1,
  ifelse(`Tenure Year` == 3, 2,
  ifelse(`Tenure Year` == 4, 3,
  ifelse(`Tenure Year` == 5, 4,
  ifelse(`Tenure Year` == 6, 5,
  ifelse(`Tenure Year` == 7, 6,
  ifelse(`Tenure Year` == 8, 7,
  ifelse(`Tenure Year` == 9, 8,
  ifelse(`Tenure Year` == 10, 9,
  ifelse(`Tenure Year` == 11, 10,
  ifelse(`Tenure Year` == 12, 11,
  ifelse(`Tenure Year` == 13, 12,
  ifelse(`Tenure Year` == 14, 13,
  ifelse(`Tenure Year` == 15, 14,
  ifelse(`Tenure Year` == 16, 15,
  ifelse(`Tenure Year` == 17, 16,
  ifelse(`Tenure Year` == 18, 17,
  ifelse(`Tenure Year` == 19, 18,
  ifelse(`Tenure Year` == 20, 19,
         NA))))))))))))))))))))
)

 # Centralize the year to 0
data_clean$Year_Centered <- with(data_clean,
  ifelse(`Year` == 2003, 0,
  ifelse(`Year` == 2004, 1,
  ifelse(`Year` == 2005, 2,
  ifelse(`Year` == 2006, 3,
  ifelse(`Year` == 2007, 4,
  ifelse(`Year` == 2008, 5,
  ifelse(`Year` == 2009, 6,
  ifelse(`Year` == 2010, 7,
  ifelse(`Year` == 2011, 8,
  ifelse(`Year` == 2012, 9,
  ifelse(`Year` == 2013, 10,
  ifelse(`Year` == 2014, 11,
  ifelse(`Year` == 2015, 12,
  ifelse(`Year` == 2016, 13,
  ifelse(`Year` == 2017, 14,
  ifelse(`Year` == 2018, 15,
  ifelse(`Year` == 2019, 16,
  ifelse(`Year` == 2020, 17,
  ifelse(`Year` == 2021, 18,
  ifelse(`Year` == 2022, 19,
  ifelse(`Year` == 2023, 20,
  ifelse(`Year` == 2024, 21,
         NA))))))))))))))))))))))
)

# Rename the columns
data_clean <- data_clean %>% dplyr::rename(`conference`= `Conference`,
                                    `school`= `School`,
                                    `area_classification` = `Area Classification (0-Rural; 1-Urban)`,
                                    `year`= `Year`,
                                    `tenure_year` = `Tenure Year`,
                                    `s_game`= `In-Season_Game`,
                                    `s_diversion`= `S_Diversion`,
                                    `attendance`= `Attendance`,
                                    `game_time`= `Game Time`,
                                    `game_result`= `Game result (Win=1; Loss=0)`,
                                    `profit`= `Athletic Dept Profit`,
                                    `total_expenses`= `Athletic Dept Total Expenses`,
                                    `total_revenues`= `Athletic Dept Total Revenues`,
                                    `game_time_chars_c_1`= `GameTime_numeric_c_1`,
                                    `game_time_num_c_2`= `GameTime_numeric_c_2`,
                                    `s_game_c`= `In-Season_Game_Centered`,
                                    `tenure_year_c`= `Tenure Year Centered`,
                                    `year_c`= `Year_Centered`
                                    ) %>% select(- `GameTime_numeric`) 

# Change the variable into a factor variable
data_clean$game_time_chars_c_1 <-as.factor(data_clean$game_time_chars_c_1)

# The variable Attendance belongs to the game level (level 1). Use the Centering Within cluster (CWC) method to centralize the attendance variable, making each cluster have a mean of 0.

data_clean %<>%
  group_by(school) %>%
  mutate(attendance_mean_school = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_school = attendance - attendance_mean_school) %>%
  ungroup()

# Rescale the attendance to make the variable have a mean of 0 and a standard deviation of 1 to solve the issue of different scales.

data_clean %<>%
  group_by(school) %>%
  mutate(attendance_mean_school = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_school_z = (attendance - attendance_mean_school)/sd(attendance, na.rm = TRUE)) %>% 
  ungroup()

# The variable Attendance belongs to the game level (level 1). Use the Centering Within cluster (CWC) method to centralize the attendance variable, making each cluster have a mean of 0.

data_clean %<>%
  group_by(year) %>%
  mutate(attendance_mean_year = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_year = attendance - attendance_mean_year) %>%
  ungroup()

# Rescale the attendance to make the variable have a mean of 0 and a standard deviation of 1 to solve the issue of different scales.

data_clean %<>%
  group_by(year) %>%
  mutate(attendance_mean_year = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_year_z = (attendance - attendance_mean_year)/sd(attendance, na.rm = TRUE)) %>%
  ungroup()

# The variable total revenues belongs to the year level (level 2). Use the Centering Grand Mean (CGM) method to centralize the total revenues variable, making all individuals have a mean of 0.

data_clean %<>%
mutate(total_revenues_mean = mean(total_revenues, na.rm = TRUE)) %>%
mutate(total_revenues_cgm = total_revenues - total_revenues_mean)

# Rescale the attendance to make the variable have a mean of 0 and a standard deviation of 1 to solve the issue of different scales.

data_clean %<>%
mutate(total_revenues_mean = mean(total_revenues, na.rm = TRUE)) %>%
mutate(total_revenues_z = (total_revenues - total_revenues_mean)/sd(total_revenues,na.rm = TRUE))

str(data_clean)

tibble [1,390 × 27] (S3: tbl_df/tbl/data.frame)
 $ conference            : Factor w/ 5 levels "ACC","Big10",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ school                : Factor w/ 31 levels "Arizona State",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ area_classification   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ year                  : num [1:1390] 2015 2015 2015 2015 2015 ...
 $ tenure_year           : num [1:1390] 1 1 1 1 1 1 1 2 2 2 ...
 $ s_game                : num [1:1390] 1 2 3 4 5 6 7 1 2 3 ...
 $ s_diversion           : num [1:1390] 0.44 0.412 0.315 0.57 0.579 ...
 $ attendance            : num [1:1390] 46500 43310 61904 44157 56534 ...
 $ game_time             : Factor w/ 54 levels "09:00","10:00",..: 49 44 46 44 46 13 15 49 44 44 ...
 $ game_result           : Factor w/ 2 levels "0","1": 2 2 1 2 1 2 2 2 2 2 ...
 $ profit                : num [1:1390] 566524 566524 566524 566524 566524 ...
 $ total_expenses        : num [1:1390] 83873516 83873516 83873516 83873516 83873516 ...
 $ total_revenues        : num [1:1390] 84440040 84440040 84440040 84440040 84440040 ...
 $ game_time_chars_c_1   : Factor w/ 4 levels "1","2","3","4": 4 4 4 4 4 2 2 4 4 4 ...
 $ game_time_num_c_2     : num [1:1390] 8 7 7.5 7 7.5 1 1.5 8 7 7 ...
 $ s_game_c              : num [1:1390] 0 1 2 3 4 5 6 0 1 2 ...
 $ tenure_year_c         : num [1:1390] 0 0 0 0 0 0 0 1 1 1 ...
 $ year_c                : num [1:1390] 12 12 12 12 12 12 12 13 13 13 ...
 $ attendance_mean_school: num [1:1390] 50009 50009 50009 50009 50009 ...
 $ attendance_cwc_school : num [1:1390] -3509 -6699 11895 -5852 6525 ...
 $ attendance_school_z   : num [1:1390] -0.65 -1.24 2.2 -1.08 1.21 ...
 $ attendance_mean_year  : num [1:1390] 70003 70003 70003 70003 70003 ...
 $ attendance_cwc_year   : num [1:1390] -23503 -26693 -8099 -25846 -13469 ...
 $ attendance_year_z     : num [1:1390] -0.856 -0.972 -0.295 -0.941 -0.49 ...
 $ total_revenues_mean   : num [1:1390] 1.25e+08 1.25e+08 1.25e+08 1.25e+08 1.25e+08 ...
 $ total_revenues_cgm    : num [1:1390] -40621158 -40621158 -40621158 -40621158 -40621158 ...
 $ total_revenues_z      : num [1:1390] -0.833 -0.833 -0.833 -0.833 -0.833 ...

3. Two-level (game-school) model with fixed effect and random effect predictors

The following interpretation is considered valid only if the statistical assumptions of the regression model are reasonably satisfied, including linearity, normality of error/random effect terms, and constant variance of the error/random effect terms. These assumptions are assessed in the residual diagnostics section.

Random Slope Models

The model that treats tenure years as a random slope performs better.

The ICC increases noticeably in the tenure-year model (model_school_r_2).
In the attendance model (model_school_r_1), the random slope variance for attendance is extremely small (0.000872) and essentially zero correlation with the intercept. However, in the tenure-year model (model_school_r_2), the random slope variance for tenure year is measurable (0.001163) with a correlation of 0.20.
The standard error of the estimate of residual variance drops from 0.13007 in the attendance model (model_school_r_1) to 0.1105 in the tenure-year model (model_school_r_2). The estimates of the residual variance drop from 0.016917 in the attendance model (model_school_r_1) to 0.012213 in the tenure-year model (model_school_r_2).

3.1 Random Slopes for Attendance

model_school_r_1 <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_school_z+ game_result + s_game_c + area_classification + tenure_year_c + total_revenues_z +conference + (1+attendance_school_z|school),
data = data_clean
)

summary(model_school_r_1)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ game_time_num_c_2 + attendance_school_z + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_z +  
    conference + (1 + attendance_school_z | school)
   Data: data_clean

REML criterion at convergence: -1323.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.7533 -0.6092 -0.0483  0.5221  3.6153 

Random effects:
 Groups   Name                Variance Std.Dev. Corr
 school   (Intercept)         0.062837 0.25067      
          attendance_school_z 0.000872 0.02953  0.00
 Residual                     0.016917 0.13007      
Number of obs: 1245, groups:  school, 27

Fixed effects:
                       Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)           5.718e-01  2.075e-01  2.122e+01   2.755   0.0118 *  
game_time_num_c_2    -7.037e-04  1.357e-03  1.207e+03  -0.519   0.6042    
attendance_school_z  -3.909e-03  7.172e-03  2.651e+01  -0.545   0.5903    
game_result1         -6.826e-04  8.872e-03  1.203e+03  -0.077   0.9387    
s_game_c             -3.757e-04  1.938e-03  1.204e+03  -0.194   0.8463    
area_classification1 -1.374e-01  1.642e-01  2.098e+01  -0.836   0.4124    
tenure_year_c         1.456e-02  1.846e-03  1.232e+03   7.884 6.92e-15 ***
total_revenues_z      5.817e-02  1.005e-02  1.232e+03   5.789 8.97e-09 ***
conferenceBig10      -1.397e-01  1.526e-01  2.103e+01  -0.915   0.3704    
conferenceBig12      -2.881e-01  2.185e-01  2.115e+01  -1.319   0.2014    
conferencePac12      -8.186e-02  1.830e-01  2.112e+01  -0.447   0.6592    
conferenceSEC        -2.351e-01  1.581e-01  2.114e+01  -1.487   0.1518    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_ ttl_r_ cnfB10
gm_tm_nm__2 -0.024                                                        
attndnc_sc_ -0.004 -0.076                                                 
game_reslt1 -0.032  0.029  0.052                                          
s_game_c    -0.035  0.144 -0.023  0.169                                   
ar_clssfct1 -0.792  0.001  0.000 -0.006 -0.002                            
tenure_yr_c -0.055 -0.045  0.086  0.001  0.026 -0.001                     
totl_rvns_z  0.071 -0.002 -0.066 -0.058 -0.047 -0.014 -0.781              
confrncBg10 -0.673  0.004  0.003  0.003  0.001  0.216  0.033 -0.050       
confrncBg12 -0.354 -0.004  0.004  0.002  0.005  0.001  0.050 -0.070  0.478
confrncPc12 -0.594 -0.009 -0.002  0.006  0.000  0.226 -0.030  0.020  0.614
conferncSEC -0.486 -0.009  0.005  0.003  0.000  0.001  0.051 -0.060  0.659
            cnfB12 cnfP12
gm_tm_nm__2              
attndnc_sc_              
game_reslt1              
s_game_c                 
ar_clssfct1              
tenure_yr_c              
totl_rvns_z              
confrncBg10              
confrncBg12              
confrncPc12  0.394       
conferncSEC  0.462  0.546

performance::icc(model_school_r_1)

Warning: Random slopes not present as fixed effects. This artificially inflates
  the conditional random effect variances.
  Respecify the fixed effects structure of your model (add random slopes
  as fixed effects).

# Intraclass Correlation Coefficient

    Adjusted ICC: 0.790
  Unadjusted ICC: 0.668

plot(model_school_r_1, type=c("p","smooth"))

There is no strong curvature. The linearity assumption is mostly reasonable.
The variability of the residuals seems to be non-constant (along the horizontal line at 0), suggesting a violation of the equal variance assumption.

qqnorm(resid(model_school_r_1)); qqline(resid(model_school_r_1))

The QQ plot indicates that the distribution of the residuals appears to have heavier tails than the normal distribution, though it look symmetical.

qqnorm(ranef(model_school_r_1)$school[, "(Intercept)"]);qqline(ranef(model_school_r_1)$school[, "(Intercept)"])

The random intercept roughly follows the reference line, so it does appear approximately normally distributed.

qqnorm(ranef(model_school_r_1)$school[, "attendance_school_z"]);qqline(ranef(model_school_r_1)$school[, "attendance_school_z"])

The random slope, attendance, does not follow the reference line, so it is not approximately normally distributed.

mf <- model.frame(model_school_r_1)
res <- residuals(model_school_r_1)
par(mfrow=c(2,3))
plot(res ~ mf$game_time_num_c_2,
main = "Residuals vs Game Time (centered)",
xlab = "Game Time (centered)", ylab = "Residuals")
plot(res ~ mf$attendance_school_z,
main = "Residuals vs Attendance (Z-scoring)",
xlab = "Attendance (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$total_revenues_z,
main = "Residuals vs Total Revenues",
xlab = "Total Revenues (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$s_game_c,
main = "Residuals vs Season Game (centered)",
xlab = "Season Game (centered)", ylab = "Residuals")
plot(res ~ mf$tenure_year_c,
main = "Residuals vs Tenure (centered)",
xlab = "Tenure (centered)", ylab = "Residuals")
plot(res ~ mf$area_classification,
main = "Residuals vs Area (0/1)",
xlab = "Area (0/1)", ylab = "Residuals")

plot(res ~ mf$conference,
main = "Residuals vs Conference",
xlab = "Conference", ylab = "Residuals")

3.2 Random Slopes for tenure year

model_school_r_2 <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_school_z+ game_result + s_game_c + area_classification + tenure_year_c + total_revenues_z +conference + (1+tenure_year_c|school),
data = data_clean
)

summary(model_school_r_2)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ game_time_num_c_2 + attendance_school_z + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_z +  
    conference + (1 + tenure_year_c | school)
   Data: data_clean

REML criterion at convergence: -1684

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.3705 -0.5636 -0.0676  0.4748  3.9509 

Random effects:
 Groups   Name          Variance Std.Dev. Corr 
 school   (Intercept)   0.058179 0.2412        
          tenure_year_c 0.001163 0.0341   -0.20
 Residual               0.012213 0.1105        
Number of obs: 1245, groups:  school, 27

Fixed effects:
                       Estimate Std. Error         df t value Pr(>|t|)  
(Intercept)           5.225e-01  1.965e-01  2.087e+01   2.659   0.0147 *
game_time_num_c_2    -3.605e-04  1.150e-03  1.196e+03  -0.313   0.7540  
attendance_school_z   2.736e-03  3.417e-03  1.209e+03   0.801   0.4234  
game_result1          8.230e-03  7.594e-03  1.196e+03   1.084   0.2787  
s_game_c             -5.605e-04  1.637e-03  1.191e+03  -0.342   0.7321  
area_classification1 -7.620e-02  1.557e-01  2.072e+01  -0.490   0.6296  
tenure_year_c         7.593e-03  7.401e-03  2.509e+01   1.026   0.3148  
total_revenues_z      2.377e-02  9.252e-03  1.221e+03   2.569   0.0103 *
conferenceBig10      -1.402e-01  1.439e-01  2.038e+01  -0.974   0.3415  
conferenceBig12      -2.587e-01  2.064e-01  2.067e+01  -1.253   0.2241  
conferencePac12      -2.896e-03  1.775e-01  2.245e+01  -0.016   0.9871  
conferenceSEC        -2.301e-01  1.492e-01  2.049e+01  -1.543   0.1382  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_ ttl_r_ cnfB10
gm_tm_nm__2 -0.021                                                        
attndnc_sc_ -0.010 -0.116                                                 
game_reslt1 -0.032  0.040  0.097                                          
s_game_c    -0.032  0.148 -0.046  0.169                                   
ar_clssfct1 -0.792  0.001  0.002 -0.001  0.000                            
tenure_yr_c -0.053 -0.014  0.049 -0.018  0.007 -0.011                     
totl_rvns_z  0.065 -0.004 -0.087 -0.036 -0.032 -0.018 -0.181              
confrncBg10 -0.672  0.005  0.010  0.005  0.001  0.217  0.008 -0.041       
confrncBg12 -0.350 -0.004  0.012  0.005  0.006  0.001  0.007 -0.044  0.475
confrncPc12 -0.593 -0.008  0.004  0.018  0.003  0.242 -0.043  0.013  0.603
conferncSEC -0.484 -0.008  0.005  0.002 -0.001  0.001  0.019 -0.052  0.658
            cnfB12 cnfP12
gm_tm_nm__2              
attndnc_sc_              
game_reslt1              
s_game_c                 
ar_clssfct1              
tenure_yr_c              
totl_rvns_z              
confrncBg10              
confrncBg12              
confrncPc12  0.384       
conferncSEC  0.459  0.531

performance::icc(model_school_r_2)

# Intraclass Correlation Coefficient

    Adjusted ICC: 0.891
  Unadjusted ICC: 0.824

plot(model_school_r_2, type=c("p","smooth"))

There is no strong curvature. The linearity assumption is mostly reasonable.
The variability of the residuals seems to be non-constant (along the horizontal line at 0), suggesting a violation of the equal variance assumption of the error term.

qqnorm(resid(model_school_r_2)); qqline(resid(model_school_r_2))

The QQ plot indicates that the distribution of the residuals appears to have heavier tails than the normal distribution, though it look symmetical.

qqnorm(ranef(model_school_r_2)$school[, "(Intercept)"]);qqline(ranef(model_school_r_2)$school[, "(Intercept)"])

The random intercept does not roughly follow the reference line, especially the head and tail, so it does not appear approximately normally distributed.

qqnorm(ranef(model_school_r_2)$school[, "tenure_year_c"]);qqline(ranef(model_school_r_2)$school[, "tenure_year_c"])

The random slope, tenure year, does not follow the reference line, so it is not approximately normally distributed.

mf <- model.frame(model_school_r_2)
res <- residuals(model_school_r_2)
par(mfrow=c(2,3))
plot(res ~ mf$game_time_num_c_2,
main = "Residuals vs Game Time (centered)",
xlab = "Game Time (centered)", ylab = "Residuals")
plot(res ~ mf$attendance_school_z,
main = "Residuals vs Attendance (Z-scoring)",
xlab = "Attendance (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$total_revenues_z,
main = "Residuals vs Total Revenues",
xlab = "Total Revenues (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$s_game_c,
main = "Residuals vs Season Game (centered)",
xlab = "Season Game (centered)", ylab = "Residuals")
plot(res ~ mf$tenure_year_c,
main = "Residuals vs Tenure (centered)",
xlab = "Tenure (centered)", ylab = "Residuals")
plot(res ~ mf$area_classification,
main = "Residuals vs Area (0/1)",
xlab = "Area (0/1)", ylab = "Residuals")

plot(res ~ mf$conference,
main = "Residuals vs Conference",
xlab = "Conference", ylab = "Residuals")

4. Two level (game-school) model with interaction effects

model_school_inter  <- lmer(
 s_diversion ~ 1 + game_time_num_c_2 + s_game_c + area_classification + attendance_school_z + game_result + tenure_year_c+ total_revenues_z+ conference+ total_revenues_z:tenure_year_c + attendance_school_z:tenure_year_c + game_result:tenure_year_c + area_classification:total_revenues_z
+ (1 + tenure_year_c | school), data = data_clean,control = lmerControl(optimizer = "bobyqa"),  REML = TRUE)

summary(model_school_inter)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
s_diversion ~ 1 + game_time_num_c_2 + s_game_c + area_classification +  
    attendance_school_z + game_result + tenure_year_c + total_revenues_z +  
    conference + total_revenues_z:tenure_year_c + attendance_school_z:tenure_year_c +  
    game_result:tenure_year_c + area_classification:total_revenues_z +  
    (1 + tenure_year_c | school)
   Data: data_clean
Control: lmerControl(optimizer = "bobyqa")

REML criterion at convergence: -1690.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.4151 -0.5134 -0.0530  0.4790  3.8653 

Random effects:
 Groups   Name          Variance Std.Dev. Corr 
 school   (Intercept)   0.055266 0.2351        
          tenure_year_c 0.001069 0.0327   -0.16
 Residual               0.011824 0.1087        
Number of obs: 1245, groups:  school, 27

Fixed effects:
                                        Estimate Std. Error         df t value
(Intercept)                            7.733e-01  2.061e-01  2.634e+01   3.752
game_time_num_c_2                     -3.343e-04  1.136e-03  1.194e+03  -0.294
s_game_c                              -7.811e-04  1.612e-03  1.188e+03  -0.485
area_classification1                  -2.500e-01  1.682e-01  2.746e+01  -1.486
attendance_school_z                   -9.818e-03  5.406e-03  1.210e+03  -1.816
game_result1                          -9.246e-03  1.114e-02  1.196e+03  -0.830
tenure_year_c                         -5.239e-03  7.452e-03  2.956e+01  -0.703
total_revenues_z                       2.809e-01  8.375e-02  2.198e+02   3.354
conferenceBig10                       -1.817e-01  1.418e-01  2.054e+01  -1.281
conferenceBig12                       -3.538e-01  2.039e-01  2.101e+01  -1.736
conferencePac12                       -3.080e-03  1.749e-01  2.266e+01  -0.018
conferenceSEC                         -2.745e-01  1.469e-01  2.061e+01  -1.869
tenure_year_c:total_revenues_z        -6.171e-03  1.131e-03  1.216e+03  -5.454
attendance_school_z:tenure_year_c      1.507e-03  5.886e-04  1.203e+03   2.560
game_result1:tenure_year_c             2.956e-03  1.720e-03  1.198e+03   1.718
area_classification1:total_revenues_z -1.837e-01  8.398e-02  2.174e+02  -2.188
                                      Pr(>|t|)    
(Intercept)                           0.000876 ***
game_time_num_c_2                     0.768606    
s_game_c                              0.628022    
area_classification1                  0.148598    
attendance_school_z                   0.069580 .  
game_result1                          0.406789    
tenure_year_c                         0.487548    
total_revenues_z                      0.000937 ***
conferenceBig10                       0.214325    
conferenceBig12                       0.097278 .  
conferencePac12                       0.986106    
conferenceSEC                         0.075898 .  
tenure_year_c:total_revenues_z        5.95e-08 ***
attendance_school_z:tenure_year_c     0.010581 *  
game_result1:tenure_year_c            0.085985 .  
area_classification1:total_revenues_z 0.029764 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


Correlation matrix not shown by default, as p = 16 > 12.
Use print(x, correlation=TRUE)  or
    vcov(x)        if you need it

performance::icc(model_school_inter)

# Intraclass Correlation Coefficient

    Adjusted ICC: 0.891
  Unadjusted ICC: 0.814

plot(model_school_inter, type=c("p","smooth"))

qqnorm(resid(model_school_inter)); qqline(resid(model_school_inter))

qqnorm(ranef(model_school_inter)$school[, "(Intercept)"]);qqline(ranef(model_school_inter)$school[, "(Intercept)"])

qqnorm(ranef(model_school_inter)$school[, "tenure_year_c"]);qqline(ranef(model_school_inter)$school[, "tenure_year_c"])

mf <- model.frame(model_school_inter)
res <- residuals(model_school_inter)
par(mfrow=c(2,3))
plot(res ~ mf$game_time_num_c_2,
main = "Residuals vs Game Time (centered)",
xlab = "Game Time (centered)", ylab = "Residuals")
plot(res ~ mf$attendance_school_z,
main = "Residuals vs Attendance (Z-scoring)",
xlab = "Attendance (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$total_revenues_z,
main = "Residuals vs Total Revenues",
xlab = "Total Revenues (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$s_game_c,
main = "Residuals vs Season Game (centered)",
xlab = "Season Game (centered)", ylab = "Residuals")
plot(res ~ mf$tenure_year_c,
main = "Residuals vs Tenure (centered)",
xlab = "Tenure (centered)", ylab = "Residuals")
plot(res ~ mf$area_classification,
main = "Residuals vs Area (0/1)",
xlab = "Area (0/1)", ylab = "Residuals")

plot(res ~ mf$conference,
main = "Residuals vs Conference",
xlab = "Conference", ylab = "Residuals")

5. Two level (game-school) models with interaction and nonlinear effects

m_school_polytenure_inter_r_2 <- lmer(
 s_diversion ~ 1 + game_time_num_c_2 + attendance_school_z + game_result + s_game_c + conference + area_classification + total_revenues_z + poly(tenure_year_c , 3) + poly(tenure_year_c , 3):conference + poly(tenure_year_c , 3):attendance_school_z + area_classification:total_revenues_z + poly(tenure_year_c , 3):total_revenues_z + (1 + poly(tenure_year_c , 2) | school), data = data_clean,  control = lmerControl(optimizer = "bobyqa"),  REML = TRUE )

summary(m_school_polytenure_inter_r_2)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
s_diversion ~ 1 + game_time_num_c_2 + attendance_school_z + game_result +  
    s_game_c + conference + area_classification + total_revenues_z +  
    poly(tenure_year_c, 3) + poly(tenure_year_c, 3):conference +  
    poly(tenure_year_c, 3):attendance_school_z + area_classification:total_revenues_z +  
    poly(tenure_year_c, 3):total_revenues_z + (1 + poly(tenure_year_c,  
    2) | school)
   Data: data_clean
Control: lmerControl(optimizer = "bobyqa")

REML criterion at convergence: -2010.7

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.6874 -0.5130 -0.0258  0.4440  4.4753 

Random effects:
 Groups   Name                    Variance  Std.Dev. Corr     
 school   (Intercept)             9.824e-02  0.31343          
          poly(tenure_year_c, 2)1 2.254e+02 15.01350 0.68     
          poly(tenure_year_c, 2)2 3.508e+01  5.92272 0.56 0.92
 Residual                         9.466e-03  0.09729          
Number of obs: 1245, groups:  school, 27

Fixed effects:
                                              Estimate Std. Error         df
(Intercept)                                  6.299e-01  2.629e-01  2.806e+01
game_time_num_c_2                           -1.568e-05  1.023e-03  1.171e+03
attendance_school_z                         -1.743e-03  3.406e-03  1.182e+03
game_result1                                 5.038e-03  6.808e-03  1.173e+03
s_game_c                                     5.754e-05  1.446e-03  1.158e+03
conferenceBig10                              7.504e-02  2.307e-01  1.612e+01
conferenceBig12                             -9.299e-02  3.346e-01  1.450e+01
conferencePac12                              2.033e-01  2.700e-01  1.639e+01
conferenceSEC                               -2.905e-01  2.500e-01  1.722e+01
area_classification1                        -3.884e-01  1.743e-01  2.964e+01
total_revenues_z                             4.437e-01  1.149e-01  8.265e+01
poly(tenure_year_c, 3)1                     -1.476e+01  1.078e+01  1.120e+01
poly(tenure_year_c, 3)2                     -4.618e+00  4.608e+00  1.032e+01
poly(tenure_year_c, 3)3                      2.885e+00  7.507e-01  3.365e+02
conferenceBig10:poly(tenure_year_c, 3)1      1.245e+01  1.238e+01  1.030e+01
conferenceBig12:poly(tenure_year_c, 3)1      1.285e+01  1.780e+01  8.364e+00
conferencePac12:poly(tenure_year_c, 3)1      1.400e+01  1.450e+01  9.456e+00
conferenceSEC:poly(tenure_year_c, 3)1       -6.710e+00  1.377e+01  1.171e+01
conferenceBig10:poly(tenure_year_c, 3)2      2.721e+00  5.279e+00  9.761e+00
conferenceBig12:poly(tenure_year_c, 3)2      4.046e-01  7.640e+00  8.356e+00
conferencePac12:poly(tenure_year_c, 3)2      4.972e+00  6.207e+00  8.885e+00
conferenceSEC:poly(tenure_year_c, 3)2       -1.112e+01  6.107e+00  1.205e+01
conferenceBig10:poly(tenure_year_c, 3)3     -4.734e+00  8.938e-01  4.231e+02
conferenceBig12:poly(tenure_year_c, 3)3     -4.901e+00  2.121e+00  5.788e+01
conferencePac12:poly(tenure_year_c, 3)3     -5.402e+00  1.301e+00  1.535e+02
conferenceSEC:poly(tenure_year_c, 3)3       -9.048e+00  1.131e+00  3.861e+02
attendance_school_z:poly(tenure_year_c, 3)1  2.863e-01  1.028e-01  1.186e+03
attendance_school_z:poly(tenure_year_c, 3)2 -6.391e-02  1.024e-01  1.189e+03
attendance_school_z:poly(tenure_year_c, 3)3 -2.676e-01  1.107e-01  1.187e+03
area_classification1:total_revenues_z       -3.887e-01  1.157e-01  8.382e+01
total_revenues_z:poly(tenure_year_c, 3)1    -6.799e-01  3.237e-01  9.654e+02
total_revenues_z:poly(tenure_year_c, 3)2     6.859e-01  3.676e-01  5.410e+02
total_revenues_z:poly(tenure_year_c, 3)3    -4.072e-01  2.049e-01  6.200e+02
                                            t value Pr(>|t|)    
(Intercept)                                   2.396 0.023482 *  
game_time_num_c_2                            -0.015 0.987772    
attendance_school_z                          -0.512 0.609066    
game_result1                                  0.740 0.459428    
s_game_c                                      0.040 0.968261    
conferenceBig10                               0.325 0.749160    
conferenceBig12                              -0.278 0.785029    
conferencePac12                               0.753 0.462084    
conferenceSEC                                -1.162 0.261204    
area_classification1                         -2.228 0.033586 *  
total_revenues_z                              3.861 0.000223 ***
poly(tenure_year_c, 3)1                      -1.369 0.197843    
poly(tenure_year_c, 3)2                      -1.002 0.339261    
poly(tenure_year_c, 3)3                       3.843 0.000145 ***
conferenceBig10:poly(tenure_year_c, 3)1       1.005 0.337918    
conferenceBig12:poly(tenure_year_c, 3)1       0.722 0.490032    
conferencePac12:poly(tenure_year_c, 3)1       0.966 0.358333    
conferenceSEC:poly(tenure_year_c, 3)1        -0.487 0.635125    
conferenceBig10:poly(tenure_year_c, 3)2       0.515 0.617771    
conferenceBig12:poly(tenure_year_c, 3)2       0.053 0.959007    
conferencePac12:poly(tenure_year_c, 3)2       0.801 0.444011    
conferenceSEC:poly(tenure_year_c, 3)2        -1.822 0.093435 .  
conferenceBig10:poly(tenure_year_c, 3)3      -5.297 1.90e-07 ***
conferenceBig12:poly(tenure_year_c, 3)3      -2.311 0.024414 *  
conferencePac12:poly(tenure_year_c, 3)3      -4.151 5.48e-05 ***
conferenceSEC:poly(tenure_year_c, 3)3        -8.003 1.44e-14 ***
attendance_school_z:poly(tenure_year_c, 3)1   2.784 0.005451 ** 
attendance_school_z:poly(tenure_year_c, 3)2  -0.624 0.532806    
attendance_school_z:poly(tenure_year_c, 3)3  -2.416 0.015830 *  
area_classification1:total_revenues_z        -3.359 0.001179 ** 
total_revenues_z:poly(tenure_year_c, 3)1     -2.100 0.035968 *  
total_revenues_z:poly(tenure_year_c, 3)2      1.866 0.062613 .  
total_revenues_z:poly(tenure_year_c, 3)3     -1.987 0.047345 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1


Correlation matrix not shown by default, as p = 33 > 12.
Use print(x, correlation=TRUE)  or
    vcov(x)        if you need it

performance::icc(m_school_polytenure_inter_r_2)

# Intraclass Correlation Coefficient

    Adjusted ICC: 0.912
  Unadjusted ICC: 0.624

plot(m_school_polytenure_inter_r_2, type=c("p","smooth"))

qqnorm(resid(m_school_polytenure_inter_r_2)); qqline(resid(m_school_polytenure_inter_r_2))

qqnorm(ranef(m_school_polytenure_inter_r_2)$school[, 1]);qqline(ranef(m_school_polytenure_inter_r_2)$school[, 1])

qqnorm(ranef(m_school_polytenure_inter_r_2)$school[, 2]);qqline(ranef(m_school_polytenure_inter_r_2)$school[, 2])

qqnorm(ranef(m_school_polytenure_inter_r_2)$school[, 3]);qqline(ranef(m_school_polytenure_inter_r_2)$school[, 3])

mf <- model.frame(m_school_polytenure_inter_r_2)
res <- residuals(m_school_polytenure_inter_r_2)
par(mfrow=c(2,3))
plot(res ~ mf$game_time_num_c_2,
main = "Residuals vs Game Time (centered)",
xlab = "Game Time (centered)", ylab = "Residuals")
plot(res ~ mf$attendance_school_z,
main = "Residuals vs Attendance (Z-scoring)",
xlab = "Attendance (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$total_revenues_z,
main = "Residuals vs Total Revenues",
xlab = "Total Revenues (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$s_game_c,
main = "Residuals vs Season Game (centered)",
xlab = "Season Game (centered)", ylab = "Residuals")
plot(res ~ mf$area_classification,
main = "Residuals vs Area (0/1)",
xlab = "Area (0/1)", ylab = "Residuals")
plot(res ~ mf$conference,
main = "Residuals vs Conference",
xlab = "Conference", ylab = "Residuals")