Stadium_Waste_Analysis_6

Author

Jingyi Yang

Install Packages

library(readxl)
library("readr")
library("tidyverse")
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ purrr     1.0.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack
library(lmerTest)
Warning: package 'lmerTest' was built under R version 4.5.2

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step
library("effectsize")
Warning: package 'effectsize' was built under R version 4.5.2

Import the Data

Clean the data

data_clean <- data %>% select(`Conference`, `School`, `Area Classification (0-Rural; 1-Urban)`, `Year`, `Tenure Year`, `In-Season_Game`, `S_Diversion`, `Attendance`, `Game Time`,`Game result (Win=1; Loss=0)`,`Athletic Dept Profit`, `Athletic Dept Total Expenses`, `Athletic Dept Total Revenues`) # select the column
data_clean$GameTime_numeric <- as.numeric(format(data_clean$`Game Time`, "%H")) + as.numeric(format(data_clean$`Game Time`, "%M"))/60 # convert game time to numerical variable and create a new column
data_clean$`Game Time`=format(data_clean$`Game Time`, format = "%H:%M") # avoid game time impacted by computer system date
data_clean <- data_clean %>% mutate(`Game Time`= as.character(`Game Time`)) %>% mutate(`Area Classification (0-Rural; 1-Urban)`= as.character(`Area Classification (0-Rural; 1-Urban)`)) %>% mutate(`Attendance`= as.numeric(`Attendance`)) # Convert the variable to its suitable data type. 
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Attendance = as.numeric(Attendance)`.
Caused by warning:
! NAs introduced by coercion
cols_to_factor <- data_clean%>% select_if(is.character) %>% colnames() 
cols_to_factor
[1] "Conference"                            
[2] "School"                                
[3] "Area Classification (0-Rural; 1-Urban)"
[4] "Game Time"                             
[5] "Game result (Win=1; Loss=0)"           
 data_clean <- data_clean %>% 
  mutate(`Game result (Win=1; Loss=0)` = na_if(`Game result (Win=1; Loss=0)`, "N/A")) %>%
          mutate(across(all_of(cols_to_factor), as.factor)) # Make sure there is no NA level.
data_clean <- subset(data_clean, !is.na(`Game result (Win=1; Loss=0)`)) # Clean the game that is being cancelled.
data_clean$GameTime_numeric_c_1 <- with(data_clean, ifelse(
  GameTime_numeric >= 9 &  GameTime_numeric < 12, 1,  # Morning
  ifelse(GameTime_numeric >= 12 & GameTime_numeric < 15.5, 2,   # noon
  ifelse(GameTime_numeric >= 15.5 & GameTime_numeric < 19, 3,   # afternoon
  ifelse(GameTime_numeric >= 19,  4,   # evening
  NA)))                                
)) # Classify game time into four time slots.
data_clean$GameTime_numeric_c_2 <-  data_clean$GameTime_numeric-12 # centralize game time by 12
data_clean$`In-Season_Game_Centered` <- with(data_clean,
  ifelse(`In-Season_Game` == 1, 0,
  ifelse(`In-Season_Game` == 2, 1,
  ifelse(`In-Season_Game` == 3, 2,
  ifelse(`In-Season_Game` == 4, 3,
  ifelse(`In-Season_Game` == 5, 4,
  ifelse(`In-Season_Game` == 6, 5,
  ifelse(`In-Season_Game` == 7, 6,
  ifelse(`In-Season_Game` == 8, 7,
  ifelse(`In-Season_Game` == 9, 8, NA)))))))))
) # Centralize in the in-season game to 0.
data_clean$`Tenure Year Centered` <- with(data_clean,
  ifelse(`Tenure Year` == 1, 0,
  ifelse(`Tenure Year` == 2, 1,
  ifelse(`Tenure Year` == 3, 2,
  ifelse(`Tenure Year` == 4, 3,
  ifelse(`Tenure Year` == 5, 4,
  ifelse(`Tenure Year` == 6, 5,
  ifelse(`Tenure Year` == 7, 6,
  ifelse(`Tenure Year` == 8, 7,
  ifelse(`Tenure Year` == 9, 8,
  ifelse(`Tenure Year` == 10, 9,
  ifelse(`Tenure Year` == 11, 10,
  ifelse(`Tenure Year` == 12, 11,
  ifelse(`Tenure Year` == 13, 12,
  ifelse(`Tenure Year` == 14, 13,
  ifelse(`Tenure Year` == 15, 14,
  ifelse(`Tenure Year` == 16, 15,
  ifelse(`Tenure Year` == 17, 16,
  ifelse(`Tenure Year` == 18, 17,
  ifelse(`Tenure Year` == 19, 18,
  ifelse(`Tenure Year` == 20, 19,
         NA))))))))))))))))))))
)  # Centralize the tenure year to 0.
data_clean$Year_Centered <- with(data_clean,
  ifelse(`Year` == 2003, 0,
  ifelse(`Year` == 2004, 1,
  ifelse(`Year` == 2005, 2,
  ifelse(`Year` == 2006, 3,
  ifelse(`Year` == 2007, 4,
  ifelse(`Year` == 2008, 5,
  ifelse(`Year` == 2009, 6,
  ifelse(`Year` == 2010, 7,
  ifelse(`Year` == 2011, 8,
  ifelse(`Year` == 2012, 9,
  ifelse(`Year` == 2013, 10,
  ifelse(`Year` == 2014, 11,
  ifelse(`Year` == 2015, 12,
  ifelse(`Year` == 2016, 13,
  ifelse(`Year` == 2017, 14,
  ifelse(`Year` == 2018, 15,
  ifelse(`Year` == 2019, 16,
  ifelse(`Year` == 2020, 17,
  ifelse(`Year` == 2021, 18,
  ifelse(`Year` == 2022, 19,
  ifelse(`Year` == 2023, 20,
  ifelse(`Year` == 2024, 21,
         NA))))))))))))))))))))))
) # centralize year to 0.
data_clean <- data_clean %>% dplyr::rename(`conference`= `Conference`,
                                    `school`= `School`,
                                    `area_classification` = `Area Classification (0-Rural; 1-Urban)`,
                                    `year`= `Year`,
                                    `tenure_year` = `Tenure Year`,
                                    `s_game`= `In-Season_Game`,
                                    `s_diversion`= `S_Diversion`,
                                    `attendance`= `Attendance`,
                                    `game_time`= `Game Time`,
                                    `game_result`= `Game result (Win=1; Loss=0)`,
                                    `profit`= `Athletic Dept Profit`,
                                    `total_expenses`= `Athletic Dept Total Expenses`,
                                    `total_revenues`= `Athletic Dept Total Revenues`,
                                    `game_time_chars_c_1`= `GameTime_numeric_c_1`,
                                    `game_time_num_c_2`= `GameTime_numeric_c_2`,
                                    `s_game_c`= `In-Season_Game_Centered`,
                                    `tenure_year_c`= `Tenure Year Centered`,
                                    `year_c`= `Year_Centered`
                                    ) %>% select(- `GameTime_numeric`) # rename the columns.

data_clean$game_time_chars_c_1 <-as.factor(data_clean$game_time_chars_c_1) # Change the variable into a factor variable.
# 
data_clean %<>%
  group_by(school) %>%
  mutate(attendance_mean_school = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_school = attendance - attendance_mean_school) %>%
  ungroup()
data_clean %<>%
  group_by(year) %>%
  mutate(attendance_mean_year = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_year = attendance - attendance_mean_year) %>%
  ungroup()
data_clean %<>%
mutate(total_revenues_mean = mean(total_revenues, na.rm = TRUE)) %>%
mutate(total_revenues_cgm = total_revenues - total_revenues_mean)
str(data_clean)
tibble [1,390 × 24] (S3: tbl_df/tbl/data.frame)
 $ conference            : Factor w/ 5 levels "ACC","Big10",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ school                : Factor w/ 31 levels "Arizona State",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ area_classification   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ year                  : num [1:1390] 2015 2015 2015 2015 2015 ...
 $ tenure_year           : num [1:1390] 1 1 1 1 1 1 1 2 2 2 ...
 $ s_game                : num [1:1390] 1 2 3 4 5 6 7 1 2 3 ...
 $ s_diversion           : num [1:1390] 0.44 0.412 0.315 0.57 0.579 ...
 $ attendance            : num [1:1390] 46500 43310 61904 44157 56534 ...
 $ game_time             : Factor w/ 54 levels "09:00","10:00",..: 49 44 46 44 46 13 15 49 44 44 ...
 $ game_result           : Factor w/ 2 levels "0","1": 2 2 1 2 1 2 2 2 2 2 ...
 $ profit                : num [1:1390] 566524 566524 566524 566524 566524 ...
 $ total_expenses        : num [1:1390] 83873516 83873516 83873516 83873516 83873516 ...
 $ total_revenues        : num [1:1390] 84440040 84440040 84440040 84440040 84440040 ...
 $ game_time_chars_c_1   : Factor w/ 4 levels "1","2","3","4": 4 4 4 4 4 2 2 4 4 4 ...
 $ game_time_num_c_2     : num [1:1390] 8 7 7.5 7 7.5 1 1.5 8 7 7 ...
 $ s_game_c              : num [1:1390] 0 1 2 3 4 5 6 0 1 2 ...
 $ tenure_year_c         : num [1:1390] 0 0 0 0 0 0 0 1 1 1 ...
 $ year_c                : num [1:1390] 12 12 12 12 12 12 12 13 13 13 ...
 $ attendance_mean_school: num [1:1390] 50009 50009 50009 50009 50009 ...
 $ attendance_cwc_school : num [1:1390] -3509 -6699 11895 -5852 6525 ...
 $ attendance_mean_year  : num [1:1390] 70003 70003 70003 70003 70003 ...
 $ attendance_cwc_year   : num [1:1390] -23503 -26693 -8099 -25846 -13469 ...
 $ total_revenues_mean   : num [1:1390] 1.25e+08 1.25e+08 1.25e+08 1.25e+08 1.25e+08 ...
 $ total_revenues_cgm    : num [1:1390] -40621158 -40621158 -40621158 -40621158 -40621158 ...

Two level Model

data_clean$attendance_cwc_year_z <- datawizard::standardize(data_clean$attendance_cwc_year)
data_clean$attendance_cwc_school_z <- datawizard::standardize(data_clean$attendance_cwc_school)
data_clean$total_revenues_cgm_z <- datawizard::standardize(data_clean$total_revenues_cgm)
model_school <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_cwc_school_z+ game_result + s_game_c + area_classification + tenure_year_c + total_revenues_cgm_z + (1|school),
data = data_clean
)
summary(model_school)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
s_diversion ~ game_time_num_c_2 + attendance_cwc_school_z + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_cgm_z +  
    (1 | school)
   Data: data_clean

REML criterion at convergence: -1299.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.6647 -0.6181 -0.0455  0.5359  4.0526 

Random effects:
 Groups   Name        Variance Std.Dev.
 school   (Intercept) 0.06038  0.2457  
 Residual             0.01773  0.1332  
Number of obs: 1245, groups:  school, 27

Fixed effects:
                          Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)              4.526e-01  1.432e-01  2.549e+01   3.161  0.00402 ** 
game_time_num_c_2       -1.036e-03  1.370e-03  1.215e+03  -0.756  0.44960    
attendance_cwc_school_z -2.446e-03  3.773e-03  1.214e+03  -0.648  0.51694    
game_result1             1.256e-03  9.010e-03  1.214e+03   0.139  0.88917    
s_game_c                -4.248e-04  1.971e-03  1.213e+03  -0.216  0.82938    
area_classification1    -1.644e-01  1.512e-01  2.506e+01  -1.087  0.28723    
tenure_year_c            1.385e-02  1.838e-03  1.233e+03   7.535 9.43e-14 ***
total_revenues_cgm_z     5.761e-02  1.002e-02  1.230e+03   5.751 1.12e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 att___ gm_rs1 s_gm_c ar_cl1 tnr_y_
gm_tm_nm__2 -0.036                                          
attndnc_c__ -0.009 -0.090                                   
game_reslt1 -0.042  0.038  0.046                            
s_game_c    -0.050  0.147 -0.070  0.167                     
ar_clssfct1 -0.939 -0.001  0.003 -0.009 -0.002              
tenure_yr_c -0.068 -0.043  0.182 -0.002  0.016  0.017       
ttl_rvns_c_  0.075 -0.010 -0.144 -0.057 -0.038 -0.030 -0.783
performance::icc(model_school)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.773
  Unadjusted ICC: 0.679
plot(model_school, type=c("p","smooth"))

  1. The line stay close to zero.
  2. Might violate the heteroscedasticity. The spread of residuals is not constant across the range of fitted values.(fitted values < 0.2, variance is small)
plot(model_school,
     form = sqrt(abs(resid(.))) ~ fitted(.),
     type = c("p","smooth"))

The line is not flat, which shows a non-constant variance pattern.

qqnorm(resid(model_school)); qqline(resid(model_school))

  1. The points almost fit on the reference line. (normally distributed)
hist(resid(model_school))

  1. a single peak (unimodal), roughly symmetrical shape,frequencies tapering off on both sides,most values concentrated around 0. (normally distributed)
qqnorm(ranef(model_school)$school[, "(Intercept)"]);qqline(ranef(model_school)$school[, "(Intercept)"])

  1. Mostly linear between -1 to 1, the majority of school random intercepts follow a normal distribution.
hist(ranef(model_school)$school[, "(Intercept)"])

  1. Distribution is roughly centered around zero, but slightly skewed. (Normality assumption is broadly acceptable)
model_school_cat <- lmer(
s_diversion ~ game_time_chars_c_1 + attendance_cwc_school_z + game_result + s_game_c + area_classification + tenure_year_c + total_revenues_cgm_z + (1|school),
data = data_clean
)
summary(model_school_cat)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ game_time_chars_c_1 + attendance_cwc_school_z +  
    game_result + s_game_c + area_classification + tenure_year_c +  
    total_revenues_cgm_z + (1 | school)
   Data: data_clean

REML criterion at convergence: -1293.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.6546 -0.6153 -0.0385  0.5464  4.1266 

Random effects:
 Groups   Name        Variance Std.Dev.
 school   (Intercept) 0.06029  0.2455  
 Residual             0.01771  0.1331  
Number of obs: 1245, groups:  school, 27

Fixed effects:
                          Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)              4.714e-01  1.442e-01  2.635e+01   3.268  0.00301 ** 
game_time_chars_c_12    -1.974e-02  1.966e-02  1.216e+03  -1.004  0.31578    
game_time_chars_c_13    -3.331e-02  2.018e-02  1.217e+03  -1.651  0.09903 .  
game_time_chars_c_14    -1.890e-02  2.091e-02  1.217e+03  -0.904  0.36615    
attendance_cwc_school_z -2.356e-03  3.770e-03  1.212e+03  -0.625  0.53214    
game_result1             2.257e-03  9.026e-03  1.212e+03   0.250  0.80254    
s_game_c                -3.513e-04  1.967e-03  1.211e+03  -0.179  0.85829    
area_classification1    -1.652e-01  1.511e-01  2.507e+01  -1.093  0.28473    
tenure_year_c            1.373e-02  1.839e-03  1.231e+03   7.466 1.56e-13 ***
total_revenues_cgm_z     5.775e-02  1.001e-02  1.229e+03   5.767 1.02e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____12 g____13 g____14 att___ gm_rs1 s_gm_c ar_cl1 tnr_y_
gm_tm_c__12 -0.128                                                           
gm_tm_c__13 -0.126  0.893                                                    
gm_tm_c__14 -0.127  0.864   0.859                                            
attndnc_c__ -0.005 -0.028  -0.052  -0.063                                    
game_reslt1 -0.037 -0.036  -0.033   0.002   0.046                            
s_game_c    -0.049 -0.006   0.034   0.061  -0.068  0.169                     
ar_clssfct1 -0.933  0.012   0.010   0.012   0.002 -0.009 -0.001              
tenure_yr_c -0.065 -0.019  -0.013  -0.041   0.181 -0.004  0.017  0.016       
ttl_rvns_c_  0.074  0.011   0.000   0.001  -0.143 -0.057 -0.040 -0.030 -0.782
performance::icc(model_school_cat)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.773
  Unadjusted ICC: 0.679
plot(model_school_cat, type=c("p","smooth"))

  1. Mild linearity, a large difference in residual spread across fitted values(heteroscedasticity)
plot(model_school_cat,
     form = sqrt(abs(resid(.))) ~ fitted(.),
     type = c("p","smooth"))

  1. The smoothed line is not horizontal (heteroscedasticity)
qqnorm(resid(model_school_cat)); qqline(resid(model_school_cat))

  1. The points almost fit on the reference line. (normally distributed)
hist(resid(model_school_cat))

  1. The histogram shows a single, central peak. The distribution is roughly bell-shaped. (normally distributed)
qqnorm(ranef(model_school_cat)$school[, "(Intercept)"]);qqline(ranef(model_school_cat)$school[, "(Intercept)"])

  1. close to linear, roughly follow a normal distribution.
hist(ranef(model_school_cat)$school[, "(Intercept)"])

  1. roughly bell-shaped distribution, but a little bit skewed. (approximate normality with mild skew)
model_year <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_cwc_year_z + game_result + s_game_c + area_classification + total_revenues_cgm_z + (1|year),
data = data_clean
)
summary(model_year)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
s_diversion ~ game_time_num_c_2 + attendance_cwc_year_z + game_result +  
    s_game_c + area_classification + total_revenues_cgm_z + (1 |      year)
   Data: data_clean

REML criterion at convergence: 389.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.0293 -0.7457 -0.3116  0.8336  2.2836 

Random effects:
 Groups   Name        Variance  Std.Dev.
 year     (Intercept) 0.0003515 0.01875 
 Residual             0.0768552 0.27723 
Number of obs: 1245, groups:  year, 20

Fixed effects:
                        Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)            4.457e-01  3.393e-02  2.265e+02  13.136  < 2e-16 ***
game_time_num_c_2     -6.502e-03  2.637e-03  1.236e+03  -2.466  0.01381 *  
attendance_cwc_year_z -2.996e-02  1.035e-02  2.185e+02  -2.895  0.00417 ** 
game_result1           3.258e-03  1.817e-02  1.238e+03   0.179  0.85774    
s_game_c              -2.251e-03  4.057e-03  1.227e+03  -0.555  0.57913    
area_classification1  -2.588e-02  2.890e-02  1.054e+03  -0.895  0.37084    
total_revenues_cgm_z   1.022e-01  1.025e-02  3.325e+01   9.967 1.62e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 att___ gm_rs1 s_gm_c ar_cl1
gm_tm_nm__2 -0.283                                   
attndnc_c__  0.222  0.014                            
game_reslt1 -0.370  0.025 -0.092                     
s_game_c    -0.431  0.130 -0.060  0.158              
ar_clssfct1 -0.732 -0.022 -0.246 -0.090 -0.007       
ttl_rvns_c_  0.093 -0.041 -0.550 -0.104 -0.023 -0.030
performance::icc(model_year)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.005
  Unadjusted ICC: 0.004
plot(model_year, type=c("p","smooth"))

  1. The smooth line rises gradually as fitted values increase. (nonlinearity)
  2. Residual variance changes as fitted values increase. (Residual variance changes as fitted values increase.)
plot(model_year,
     form = sqrt(abs(resid(.))) ~ fitted(.),
     type = c("p","smooth"))

  1. The smooth curve is NOT flat.
  2. The point cluster around 0.4
qqnorm(resid(model_year)); qqline(resid(model_year))

  1. The s-shape curve (indicates skewness in the residuals)
hist(resid(model_year))

  1. Skewed (The bars are taller and more concentrated around -0.2)
qqnorm(ranef(model_year)$year[, "(Intercept)"]);qqline(ranef(model_year)$year[, "(Intercept)"])

  1. Points lie very close to the line (very small deviations)- Level-2 random intercepts (year effects) are approximately normally distributed.
hist(ranef(model_year)$year[, "(Intercept)"])

  1. No extreme skewness
  2. Slight multi-modality
model_year_cat <- lmer(
s_diversion ~ game_time_chars_c_1 + attendance_cwc_year_z + game_result + s_game_c + area_classification + tenure_year_c + total_revenues_cgm_z + (1|year),
data = data_clean
)
summary(model_year_cat)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
s_diversion ~ game_time_chars_c_1 + attendance_cwc_year_z + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_cgm_z +  
    (1 | year)
   Data: data_clean

REML criterion at convergence: 361.9

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.8337 -0.7852 -0.2846  0.7837  2.4683 

Random effects:
 Groups   Name        Variance  Std.Dev.
 year     (Intercept) 0.0001522 0.01234 
 Residual             0.0743757 0.27272 
Number of obs: 1245, groups:  year, 20

Fixed effects:
                        Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)            3.913e-01  4.708e-02  4.925e+02   8.311 9.30e-16 ***
game_time_chars_c_12   6.882e-03  3.586e-02  1.221e+03   0.192  0.84784    
game_time_chars_c_13  -4.074e-02  3.642e-02  1.217e+03  -1.119  0.26351    
game_time_chars_c_14  -2.393e-02  3.736e-02  1.228e+03  -0.640  0.52197    
attendance_cwc_year_z -2.623e-02  1.010e-02  4.114e+02  -2.598  0.00971 ** 
game_result1           1.304e-03  1.788e-02  1.234e+03   0.073  0.94186    
s_game_c              -1.828e-03  3.986e-03  1.231e+03  -0.459  0.64663    
area_classification1  -4.424e-02  2.872e-02  1.075e+03  -1.541  0.12368    
tenure_year_c          1.240e-02  1.850e-03  1.090e+03   6.705 3.23e-11 ***
total_revenues_cgm_z   7.686e-02  1.049e-02  1.140e+02   7.329 3.59e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____12 g____13 g____14 att___ gm_rs1 s_gm_c ar_cl1 tnr_y_
gm_tm_c__12 -0.695                                                           
gm_tm_c__13 -0.678  0.867                                                    
gm_tm_c__14 -0.688  0.846   0.831                                            
attndnc_c__  0.193 -0.050  -0.068  -0.045                                    
game_reslt1 -0.243 -0.025  -0.020   0.003  -0.092                            
s_game_c    -0.287 -0.025   0.017   0.039  -0.063  0.159                     
ar_clssfct1 -0.579  0.120   0.086   0.111  -0.248 -0.090 -0.009              
tenure_yr_c -0.047 -0.146  -0.130  -0.129   0.040 -0.012  0.010 -0.113       
ttl_rvns_c_  0.070  0.069   0.058   0.032  -0.514 -0.098 -0.028  0.010 -0.358
performance::icc(model_year_cat)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.002
  Unadjusted ICC: 0.002
plot(model_year_cat, type=c("p","smooth"))

  1. Nonlinearity (Strong heteroscedasticity)
plot(model_year_cat,
     form = sqrt(abs(resid(.))) ~ fitted(.),
     type = c("p","smooth"))

  1. Have strong curve (heteroscedasticity)
qqnorm(resid(model_year_cat)); qqline(resid(model_year_cat))

  1. Strong S-shape pattern (residuals are not normally distributed)
hist(resid(model_year_cat))

  1. The residual distribution is skewed, not fit with a normal distribution.
qqnorm(ranef(model_year_cat)$year[, "(Intercept)"]);qqline(ranef(model_year_cat)$year[, "(Intercept)"])

  1. Close to the diagonal line (Level-2 random intercepts (year effects) are approximately normally distributed)
hist(ranef(model_year_cat)$year[, "(Intercept)"])

  1. A single peak, roughly symmetrical shape, no extreme outliers, no bimodality(approximately normally distributed)