Stadium_Waste_Analysis_9

Author

Jingyi Yang

Install Packages

library(readxl)
library("readr")
library("tidyverse")
Warning: package 'ggplot2' was built under R version 4.5.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ purrr     1.0.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack
library(lmerTest)
Warning: package 'lmerTest' was built under R version 4.5.2

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step
library("effectsize")
Warning: package 'effectsize' was built under R version 4.5.2

Import the Data

Clean the data

data_clean <- data %>% select(`Conference`, `School`, `Area Classification (0-Rural; 1-Urban)`, `Year`, `Tenure Year`, `In-Season_Game`, `S_Diversion`, `Attendance`, `Game Time`,`Game result (Win=1; Loss=0)`,`Athletic Dept Profit`, `Athletic Dept Total Expenses`, `Athletic Dept Total Revenues`) # Select the column
data_clean$GameTime_numeric <- as.numeric(format(data_clean$`Game Time`, "%H")) + as.numeric(format(data_clean$`Game Time`, "%M"))/60 # Convert game time to a numerical variable and create a new column
data_clean$`Game Time`=format(data_clean$`Game Time`, format = "%H:%M") # Avoid game time impacted by the computer system date
data_clean <- data_clean %>% mutate(`Game Time`= as.character(`Game Time`)) %>% mutate(`Area Classification (0-Rural; 1-Urban)`= as.character(`Area Classification (0-Rural; 1-Urban)`)) %>% mutate(`Attendance`= as.numeric(`Attendance`)) # Convert the categorical variables to the factor data type.
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Attendance = as.numeric(Attendance)`.
Caused by warning:
! NAs introduced by coercion
cols_to_factor <- data_clean%>% select_if(is.character) %>% colnames() # Convert the categorical variables to the factor data type.
cols_to_factor 
[1] "Conference"                            
[2] "School"                                
[3] "Area Classification (0-Rural; 1-Urban)"
[4] "Game Time"                             
[5] "Game result (Win=1; Loss=0)"           
 data_clean <- data_clean %>% 
  mutate(`Game result (Win=1; Loss=0)` = na_if(`Game result (Win=1; Loss=0)`, "N/A")) %>%
          mutate(across(all_of(cols_to_factor), as.factor)) # Make sure there is no NA level.
data_clean <- subset(data_clean, !is.na(`Game result (Win=1; Loss=0)`)) # Clean the game that is being cancelled.
data_clean$GameTime_numeric_c_1 <- with(data_clean, ifelse(
  GameTime_numeric >= 9 &  GameTime_numeric < 12, 1,  # Morning
  ifelse(GameTime_numeric >= 12 & GameTime_numeric < 15.5, 2,   # noon
  ifelse(GameTime_numeric >= 15.5 & GameTime_numeric < 19, 3,   # afternoon
  ifelse(GameTime_numeric >= 19,  4,   # evening
  NA)))                                
)) # Classify game time into four time slots.
data_clean$GameTime_numeric_c_2 <-  data_clean$GameTime_numeric-12 # Centralize game time by 12
data_clean$`In-Season_Game_Centered` <- with(data_clean,
  ifelse(`In-Season_Game` == 1, 0,
  ifelse(`In-Season_Game` == 2, 1,
  ifelse(`In-Season_Game` == 3, 2,
  ifelse(`In-Season_Game` == 4, 3,
  ifelse(`In-Season_Game` == 5, 4,
  ifelse(`In-Season_Game` == 6, 5,
  ifelse(`In-Season_Game` == 7, 6,
  ifelse(`In-Season_Game` == 8, 7,
  ifelse(`In-Season_Game` == 9, 8, NA)))))))))
) # Centralize in the in-season game to 0.
data_clean$`Tenure Year Centered` <- with(data_clean,
  ifelse(`Tenure Year` == 1, 0,
  ifelse(`Tenure Year` == 2, 1,
  ifelse(`Tenure Year` == 3, 2,
  ifelse(`Tenure Year` == 4, 3,
  ifelse(`Tenure Year` == 5, 4,
  ifelse(`Tenure Year` == 6, 5,
  ifelse(`Tenure Year` == 7, 6,
  ifelse(`Tenure Year` == 8, 7,
  ifelse(`Tenure Year` == 9, 8,
  ifelse(`Tenure Year` == 10, 9,
  ifelse(`Tenure Year` == 11, 10,
  ifelse(`Tenure Year` == 12, 11,
  ifelse(`Tenure Year` == 13, 12,
  ifelse(`Tenure Year` == 14, 13,
  ifelse(`Tenure Year` == 15, 14,
  ifelse(`Tenure Year` == 16, 15,
  ifelse(`Tenure Year` == 17, 16,
  ifelse(`Tenure Year` == 18, 17,
  ifelse(`Tenure Year` == 19, 18,
  ifelse(`Tenure Year` == 20, 19,
         NA))))))))))))))))))))
)  # Centralize the tenure year to 0.
data_clean$Year_Centered <- with(data_clean,
  ifelse(`Year` == 2003, 0,
  ifelse(`Year` == 2004, 1,
  ifelse(`Year` == 2005, 2,
  ifelse(`Year` == 2006, 3,
  ifelse(`Year` == 2007, 4,
  ifelse(`Year` == 2008, 5,
  ifelse(`Year` == 2009, 6,
  ifelse(`Year` == 2010, 7,
  ifelse(`Year` == 2011, 8,
  ifelse(`Year` == 2012, 9,
  ifelse(`Year` == 2013, 10,
  ifelse(`Year` == 2014, 11,
  ifelse(`Year` == 2015, 12,
  ifelse(`Year` == 2016, 13,
  ifelse(`Year` == 2017, 14,
  ifelse(`Year` == 2018, 15,
  ifelse(`Year` == 2019, 16,
  ifelse(`Year` == 2020, 17,
  ifelse(`Year` == 2021, 18,
  ifelse(`Year` == 2022, 19,
  ifelse(`Year` == 2023, 20,
  ifelse(`Year` == 2024, 21,
         NA))))))))))))))))))))))
) # Centralize the year to 0.
data_clean <- data_clean %>% dplyr::rename(`conference`= `Conference`,
                                    `school`= `School`,
                                    `area_classification` = `Area Classification (0-Rural; 1-Urban)`,
                                    `year`= `Year`,
                                    `tenure_year` = `Tenure Year`,
                                    `s_game`= `In-Season_Game`,
                                    `s_diversion`= `S_Diversion`,
                                    `attendance`= `Attendance`,
                                    `game_time`= `Game Time`,
                                    `game_result`= `Game result (Win=1; Loss=0)`,
                                    `profit`= `Athletic Dept Profit`,
                                    `total_expenses`= `Athletic Dept Total Expenses`,
                                    `total_revenues`= `Athletic Dept Total Revenues`,
                                    `game_time_chars_c_1`= `GameTime_numeric_c_1`,
                                    `game_time_num_c_2`= `GameTime_numeric_c_2`,
                                    `s_game_c`= `In-Season_Game_Centered`,
                                    `tenure_year_c`= `Tenure Year Centered`,
                                    `year_c`= `Year_Centered`
                                    ) %>% select(- `GameTime_numeric`) # Rename the columns.

data_clean$game_time_chars_c_1 <-as.factor(data_clean$game_time_chars_c_1) # Change the variable into a factor variable.
# The variable Attendance belongs to the game level (level 1). Use the Centering Within cluster (CWC) method to centralize the attendance variable, making each cluster have a mean of 0.

data_clean %<>%
  group_by(school) %>%
  mutate(attendance_mean_school = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_school = attendance - attendance_mean_school) %>%
  ungroup()
# Rescale the attendance to make the variable have a mean of 0 and a standard deviation of 1 to solve the issue of different scales.

data_clean %<>%
  group_by(school) %>%
  mutate(attendance_mean_school = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_school_z = (attendance - attendance_mean_school)/sd(attendance, na.rm = TRUE)) %>% 
  ungroup()
# The variable Attendance belongs to the game level (level 1). Use the Centering Within cluster (CWC) method to centralize the attendance variable, making each cluster have a mean of 0.

data_clean %<>%
  group_by(year) %>%
  mutate(attendance_mean_year = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_year = attendance - attendance_mean_year) %>%
  ungroup()
# Rescale the attendance to make the variable have a mean of 0 and a standard deviation of 1 to solve the issue of different scales.

data_clean %<>%
  group_by(year) %>%
  mutate(attendance_mean_year = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_year_z = (attendance - attendance_mean_year)/sd(attendance, na.rm = TRUE)) %>%
  ungroup()
# The variable total revenues belongs to the year level (level 2). Use the Centering Grand Mean (CGM) method to centralize the total revenues variable, making all individuals have a mean of 0.

data_clean %<>%
mutate(total_revenues_mean = mean(total_revenues, na.rm = TRUE)) %>%
mutate(total_revenues_cgm = total_revenues - total_revenues_mean)
# Rescale the attendance to make the variable have a mean of 0 and a standard deviation of 1 to solve the issue of different scales.

data_clean %<>%
mutate(total_revenues_mean = mean(total_revenues, na.rm = TRUE)) %>%
mutate(total_revenues_z = (total_revenues - total_revenues_mean)/sd(total_revenues,na.rm = TRUE))
str(data_clean)
tibble [1,390 × 27] (S3: tbl_df/tbl/data.frame)
 $ conference            : Factor w/ 5 levels "ACC","Big10",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ school                : Factor w/ 31 levels "Arizona State",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ area_classification   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ year                  : num [1:1390] 2015 2015 2015 2015 2015 ...
 $ tenure_year           : num [1:1390] 1 1 1 1 1 1 1 2 2 2 ...
 $ s_game                : num [1:1390] 1 2 3 4 5 6 7 1 2 3 ...
 $ s_diversion           : num [1:1390] 0.44 0.412 0.315 0.57 0.579 ...
 $ attendance            : num [1:1390] 46500 43310 61904 44157 56534 ...
 $ game_time             : Factor w/ 54 levels "09:00","10:00",..: 49 44 46 44 46 13 15 49 44 44 ...
 $ game_result           : Factor w/ 2 levels "0","1": 2 2 1 2 1 2 2 2 2 2 ...
 $ profit                : num [1:1390] 566524 566524 566524 566524 566524 ...
 $ total_expenses        : num [1:1390] 83873516 83873516 83873516 83873516 83873516 ...
 $ total_revenues        : num [1:1390] 84440040 84440040 84440040 84440040 84440040 ...
 $ game_time_chars_c_1   : Factor w/ 4 levels "1","2","3","4": 4 4 4 4 4 2 2 4 4 4 ...
 $ game_time_num_c_2     : num [1:1390] 8 7 7.5 7 7.5 1 1.5 8 7 7 ...
 $ s_game_c              : num [1:1390] 0 1 2 3 4 5 6 0 1 2 ...
 $ tenure_year_c         : num [1:1390] 0 0 0 0 0 0 0 1 1 1 ...
 $ year_c                : num [1:1390] 12 12 12 12 12 12 12 13 13 13 ...
 $ attendance_mean_school: num [1:1390] 50009 50009 50009 50009 50009 ...
 $ attendance_cwc_school : num [1:1390] -3509 -6699 11895 -5852 6525 ...
 $ attendance_school_z   : num [1:1390] -0.65 -1.24 2.2 -1.08 1.21 ...
 $ attendance_mean_year  : num [1:1390] 70003 70003 70003 70003 70003 ...
 $ attendance_cwc_year   : num [1:1390] -23503 -26693 -8099 -25846 -13469 ...
 $ attendance_year_z     : num [1:1390] -0.856 -0.972 -0.295 -0.941 -0.49 ...
 $ total_revenues_mean   : num [1:1390] 1.25e+08 1.25e+08 1.25e+08 1.25e+08 1.25e+08 ...
 $ total_revenues_cgm    : num [1:1390] -40621158 -40621158 -40621158 -40621158 -40621158 ...
 $ total_revenues_z      : num [1:1390] -0.833 -0.833 -0.833 -0.833 -0.833 ...

Two level Model

Random Slope Models

The model that treats tenure year as a random slope performs better.

Random Slopes for Attendance

model_school_r_1 <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_school_z+ game_result + s_game_c + area_classification + tenure_year_c + total_revenues_z +conference + (1+attendance_school_z|school),
data = data_clean
)
summary(model_school_r_1)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ game_time_num_c_2 + attendance_school_z + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_z +  
    conference + (1 + attendance_school_z | school)
   Data: data_clean

REML criterion at convergence: -1323.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.7533 -0.6092 -0.0483  0.5221  3.6153 

Random effects:
 Groups   Name                Variance Std.Dev. Corr
 school   (Intercept)         0.062837 0.25067      
          attendance_school_z 0.000872 0.02953  0.00
 Residual                     0.016917 0.13007      
Number of obs: 1245, groups:  school, 27

Fixed effects:
                       Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)           5.718e-01  2.075e-01  2.122e+01   2.755   0.0118 *  
game_time_num_c_2    -7.037e-04  1.357e-03  1.207e+03  -0.519   0.6042    
attendance_school_z  -3.909e-03  7.172e-03  2.651e+01  -0.545   0.5903    
game_result1         -6.826e-04  8.872e-03  1.203e+03  -0.077   0.9387    
s_game_c             -3.757e-04  1.938e-03  1.204e+03  -0.194   0.8463    
area_classification1 -1.374e-01  1.642e-01  2.098e+01  -0.836   0.4124    
tenure_year_c         1.456e-02  1.846e-03  1.232e+03   7.884 6.92e-15 ***
total_revenues_z      5.817e-02  1.005e-02  1.232e+03   5.789 8.97e-09 ***
conferenceBig10      -1.397e-01  1.526e-01  2.103e+01  -0.915   0.3704    
conferenceBig12      -2.881e-01  2.185e-01  2.115e+01  -1.319   0.2014    
conferencePac12      -8.186e-02  1.830e-01  2.112e+01  -0.447   0.6592    
conferenceSEC        -2.351e-01  1.581e-01  2.114e+01  -1.487   0.1518    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_ ttl_r_ cnfB10
gm_tm_nm__2 -0.024                                                        
attndnc_sc_ -0.004 -0.076                                                 
game_reslt1 -0.032  0.029  0.052                                          
s_game_c    -0.035  0.144 -0.023  0.169                                   
ar_clssfct1 -0.792  0.001  0.000 -0.006 -0.002                            
tenure_yr_c -0.055 -0.045  0.086  0.001  0.026 -0.001                     
totl_rvns_z  0.071 -0.002 -0.066 -0.058 -0.047 -0.014 -0.781              
confrncBg10 -0.673  0.004  0.003  0.003  0.001  0.216  0.033 -0.050       
confrncBg12 -0.354 -0.004  0.004  0.002  0.005  0.001  0.050 -0.070  0.478
confrncPc12 -0.594 -0.009 -0.002  0.006  0.000  0.226 -0.030  0.020  0.614
conferncSEC -0.486 -0.009  0.005  0.003  0.000  0.001  0.051 -0.060  0.659
            cnfB12 cnfP12
gm_tm_nm__2              
attndnc_sc_              
game_reslt1              
s_game_c                 
ar_clssfct1              
tenure_yr_c              
totl_rvns_z              
confrncBg10              
confrncBg12              
confrncPc12  0.394       
conferncSEC  0.462  0.546
performance::icc(model_school_r_1)
Warning: Random slopes not present as fixed effects. This artificially inflates
  the conditional random effect variances.
  Respecify the fixed effects structure of your model (add random slopes
  as fixed effects).
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.790
  Unadjusted ICC: 0.668
plot(model_school_r_1, type=c("p","smooth"))

  1. There is no strong curvature. The linearity assumption is mostly reasonable.
  2. The vertical spread of points increases and decreases across the fitted values. Accordingly, the variance is not constant, which indicates heteroscedasticity.
plot(model_school_r_1,
     form = sqrt(abs(resid(.))) ~ fitted(.),
     type = c("p","smooth"))

The line is going down quickly, and the point is clustered around 0.2, which shows the variance is not constant and there is heteroscedasticity.

qqnorm(resid(model_school_r_1)); qqline(resid(model_school_r_1))

The head and tail are deviated, which shows non-normal residuals.

qqnorm(ranef(model_school_r_1)$school[, "(Intercept)"]);qqline(ranef(model_school_r_1)$school[, "(Intercept)"])

The random intercept roughly follows the reference line. There is a slight deviation, so the normality assumption is being satisfied.

mf <- model.frame(model_school_r_1)
res <- residuals(model_school_r_1)
par(mfrow=c(2,3))
plot(res ~ mf$game_time_num_c_2,
main = "Residuals vs Game Time (centered)",
xlab = "Game Time (centered)", ylab = "Residuals")
plot(res ~ mf$attendance_school_z,
main = "Residuals vs Attendance (Z-scoring)",
xlab = "Attendance (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$total_revenues_z,
main = "Residuals vs Total Revenues",
xlab = "Total Revenues (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$s_game_c,
main = "Residuals vs Season Game (centered)",
xlab = "Season Game (centered)", ylab = "Residuals")
plot(res ~ mf$tenure_year_c,
main = "Residuals vs Tenure (centered)",
xlab = "Tenure (centered)", ylab = "Residuals")
plot(res ~ mf$area_classification,
main = "Residuals vs Area (0/1)",
xlab = "Area (0/1)", ylab = "Residuals")

plot(res ~ mf$conference,
main = "Residuals vs Conference",
xlab = "Conference", ylab = "Residuals")

  1. Residuals vs Game Time (centered): No obvious pattern,the variance is constant, which indicates no heteroscedasticity.

  2. Residuals vs Attendance (Z-scoring): Residuals cluster toward the center, which possible indicate heteroscedasticity.

  3. Residuals vs Total Revenues (Z-scoring): Residuals cluster toward the center, which possible indicate heteroscedasticity.

  4. Residuals vs Season Game (centered): No trend, which indicates constant variance.

  5. Residuals vs Tenure (centered): Residual spread increases with higher tenure year, which indicates heteroscedasticity.

  6. Residuals vs Area (0/1): Boxplots for Area 0 vs 1 show similar medians, and the variance is more likely to be constant.

  7. Residuals vs Conference: Boxplots for Conference show similar medians, and the variance is more likely to be constant.

Random Slopes for tenure year

model_school_r_2 <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_school_z+ game_result + s_game_c + area_classification + tenure_year_c + total_revenues_z +conference + (1+tenure_year_c|school),
data = data_clean
)
summary(model_school_r_2)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ game_time_num_c_2 + attendance_school_z + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_z +  
    conference + (1 + tenure_year_c | school)
   Data: data_clean

REML criterion at convergence: -1684

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.3705 -0.5636 -0.0676  0.4748  3.9509 

Random effects:
 Groups   Name          Variance Std.Dev. Corr 
 school   (Intercept)   0.058179 0.2412        
          tenure_year_c 0.001163 0.0341   -0.20
 Residual               0.012213 0.1105        
Number of obs: 1245, groups:  school, 27

Fixed effects:
                       Estimate Std. Error         df t value Pr(>|t|)  
(Intercept)           5.225e-01  1.965e-01  2.087e+01   2.659   0.0147 *
game_time_num_c_2    -3.605e-04  1.150e-03  1.196e+03  -0.313   0.7540  
attendance_school_z   2.736e-03  3.417e-03  1.209e+03   0.801   0.4234  
game_result1          8.230e-03  7.594e-03  1.196e+03   1.084   0.2787  
s_game_c             -5.605e-04  1.637e-03  1.191e+03  -0.342   0.7321  
area_classification1 -7.620e-02  1.557e-01  2.072e+01  -0.490   0.6296  
tenure_year_c         7.593e-03  7.401e-03  2.509e+01   1.026   0.3148  
total_revenues_z      2.377e-02  9.252e-03  1.221e+03   2.569   0.0103 *
conferenceBig10      -1.402e-01  1.439e-01  2.038e+01  -0.974   0.3415  
conferenceBig12      -2.587e-01  2.064e-01  2.067e+01  -1.253   0.2241  
conferencePac12      -2.896e-03  1.775e-01  2.245e+01  -0.016   0.9871  
conferenceSEC        -2.301e-01  1.492e-01  2.049e+01  -1.543   0.1382  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_ ttl_r_ cnfB10
gm_tm_nm__2 -0.021                                                        
attndnc_sc_ -0.010 -0.116                                                 
game_reslt1 -0.032  0.040  0.097                                          
s_game_c    -0.032  0.148 -0.046  0.169                                   
ar_clssfct1 -0.792  0.001  0.002 -0.001  0.000                            
tenure_yr_c -0.053 -0.014  0.049 -0.018  0.007 -0.011                     
totl_rvns_z  0.065 -0.004 -0.087 -0.036 -0.032 -0.018 -0.181              
confrncBg10 -0.672  0.005  0.010  0.005  0.001  0.217  0.008 -0.041       
confrncBg12 -0.350 -0.004  0.012  0.005  0.006  0.001  0.007 -0.044  0.475
confrncPc12 -0.593 -0.008  0.004  0.018  0.003  0.242 -0.043  0.013  0.603
conferncSEC -0.484 -0.008  0.005  0.002 -0.001  0.001  0.019 -0.052  0.658
            cnfB12 cnfP12
gm_tm_nm__2              
attndnc_sc_              
game_reslt1              
s_game_c                 
ar_clssfct1              
tenure_yr_c              
totl_rvns_z              
confrncBg10              
confrncBg12              
confrncPc12  0.384       
conferncSEC  0.459  0.531
performance::icc(model_school_r_2)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.891
  Unadjusted ICC: 0.824
plot(model_school_r_2, type=c("p","smooth"))

  1. There is no strong curvature. The linearity assumption is mostly reasonable.
  2. The vertical spread of points increases and decreases across the fitted values. Accordingly, the variance is not constant, which indicates heteroscedasticity.
plot(model_school_r_2,
     form = sqrt(abs(resid(.))) ~ fitted(.),
     type = c("p","smooth"))

The line is going down quickly, and the point is clustered around 0.2, which shows the variance is not constant and there is heteroscedasticity.

qqnorm(resid(model_school_r_2)); qqline(resid(model_school_r_2))

The head and tail are deviated, which shows non-normal residuals.

qqnorm(ranef(model_school_r_2)$school[, "(Intercept)"]);qqline(ranef(model_school_r_2)$school[, "(Intercept)"])

The random intercept is not roughly follows the reference line, especially two tails, so the normality assumption is not being satisfied.

mf <- model.frame(model_school_r_2)
res <- residuals(model_school_r_2)
par(mfrow=c(2,3))
plot(res ~ mf$game_time_num_c_2,
main = "Residuals vs Game Time (centered)",
xlab = "Game Time (centered)", ylab = "Residuals")
plot(res ~ mf$attendance_school_z,
main = "Residuals vs Attendance (Z-scoring)",
xlab = "Attendance (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$total_revenues_z,
main = "Residuals vs Total Revenues",
xlab = "Total Revenues (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$s_game_c,
main = "Residuals vs Season Game (centered)",
xlab = "Season Game (centered)", ylab = "Residuals")
plot(res ~ mf$tenure_year_c,
main = "Residuals vs Tenure (centered)",
xlab = "Tenure (centered)", ylab = "Residuals")
plot(res ~ mf$area_classification,
main = "Residuals vs Area (0/1)",
xlab = "Area (0/1)", ylab = "Residuals")

plot(res ~ mf$conference,
main = "Residuals vs Conference",
xlab = "Conference", ylab = "Residuals")

  1. Residuals vs Game Time (centered): No obvious pattern,the variance is constant, which indicates no heteroscedasticity.

  2. Residuals vs Attendance (Z-scoring): Residuals cluster toward the center, which possible indicate heteroscedasticity.

  3. Residuals vs Total Revenues (Z-scoring): Residuals cluster toward the center, which possible indicate heteroscedasticity.

  4. Residuals vs Season Game (centered): No trend, which indicates constant variance.

  5. Residuals vs Tenure (centered): Residual spread increases with higher tenure year, which indicates heteroscedasticity.

  6. Residuals vs Area (0/1): Boxplots for Area 0 vs 1 show similar medians, and the variance is more likely to be constant.

  7. Residuals vs Conference: Boxplots for Conference show similar medians, and the variance is more likely to be constant.