Stadium_Waste_Analysis_4

Author

Jingyi Yang

Install Packages

library(readxl)
library("readr")
library("tidyverse")
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ purrr     1.0.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lme4)
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack
library(lmerTest)
Warning: package 'lmerTest' was built under R version 4.5.2

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

Import the Data

Clean the data

data_clean <- data %>% select(`Conference`, `School`, `Area Classification (0-Rural; 1-Urban)`, `Year`, `Tenure Year`, `In-Season_Game`, `S_Diversion`, `Attendance`, `Game Time`,`Game result (Win=1; Loss=0)`,`Athletic Dept Profit`, `Athletic Dept Total Expenses`, `Athletic Dept Total Revenues`) # select the column
data_clean$GameTime_numeric <- as.numeric(format(data_clean$`Game Time`, "%H")) + as.numeric(format(data_clean$`Game Time`, "%M"))/60 # convert game time to numerical variable and create a new column
data_clean$`Game Time`=format(data_clean$`Game Time`, format = "%H:%M") # avoid game time impacted by computer system date
data_clean <- data_clean %>% mutate(`Game Time`= as.character(`Game Time`)) %>% mutate(`Area Classification (0-Rural; 1-Urban)`= as.character(`Area Classification (0-Rural; 1-Urban)`)) %>% mutate(`Attendance`= as.numeric(`Attendance`)) # Convert the variable to its suitable data type. 
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Attendance = as.numeric(Attendance)`.
Caused by warning:
! NAs introduced by coercion
cols_to_factor <- data_clean%>% select_if(is.character) %>% colnames() 
cols_to_factor
[1] "Conference"                            
[2] "School"                                
[3] "Area Classification (0-Rural; 1-Urban)"
[4] "Game Time"                             
[5] "Game result (Win=1; Loss=0)"           
 data_clean <- data_clean %>% 
  mutate(`Game result (Win=1; Loss=0)` = na_if(`Game result (Win=1; Loss=0)`, "N/A")) %>%
          mutate(across(all_of(cols_to_factor), as.factor)) # Make sure there is no NA level.
data_clean <- subset(data_clean, !is.na(`Game result (Win=1; Loss=0)`)) # Clean the game that is being cancelled.
data_clean$GameTime_numeric_c_1 <- with(data_clean, ifelse(
  GameTime_numeric >= 9 &  GameTime_numeric < 12, 1,  # Morning
  ifelse(GameTime_numeric >= 12 & GameTime_numeric < 15.5, 2,   # noon
  ifelse(GameTime_numeric >= 15.5 & GameTime_numeric < 19, 3,   # afternoon
  ifelse(GameTime_numeric >= 19,  4,   # evening
  NA)))                                
)) # Classify game time into four time slots.
data_clean$GameTime_numeric_c_2 <-  data_clean$GameTime_numeric-12 # centralize game time by 12
data_clean$`In-Season_Game_Centered` <- with(data_clean,
  ifelse(`In-Season_Game` == 1, 0,
  ifelse(`In-Season_Game` == 2, 1,
  ifelse(`In-Season_Game` == 3, 2,
  ifelse(`In-Season_Game` == 4, 3,
  ifelse(`In-Season_Game` == 5, 4,
  ifelse(`In-Season_Game` == 6, 5,
  ifelse(`In-Season_Game` == 7, 6,
  ifelse(`In-Season_Game` == 8, 7,
  ifelse(`In-Season_Game` == 9, 8, NA)))))))))
) # Centralize in the in-season game to 0.
data_clean$`Tenure Year Centered` <- with(data_clean,
  ifelse(`Tenure Year` == 1, 0,
  ifelse(`Tenure Year` == 2, 1,
  ifelse(`Tenure Year` == 3, 2,
  ifelse(`Tenure Year` == 4, 3,
  ifelse(`Tenure Year` == 5, 4,
  ifelse(`Tenure Year` == 6, 5,
  ifelse(`Tenure Year` == 7, 6,
  ifelse(`Tenure Year` == 8, 7,
  ifelse(`Tenure Year` == 9, 8,
  ifelse(`Tenure Year` == 10, 9,
  ifelse(`Tenure Year` == 11, 10,
  ifelse(`Tenure Year` == 12, 11,
  ifelse(`Tenure Year` == 13, 12,
  ifelse(`Tenure Year` == 14, 13,
  ifelse(`Tenure Year` == 15, 14,
  ifelse(`Tenure Year` == 16, 15,
  ifelse(`Tenure Year` == 17, 16,
  ifelse(`Tenure Year` == 18, 17,
  ifelse(`Tenure Year` == 19, 18,
  ifelse(`Tenure Year` == 20, 19,
         NA))))))))))))))))))))
)  # Centralize the tenure year to 0.
data_clean$Year_Centered <- with(data_clean,
  ifelse(`Year` == 2003, 0,
  ifelse(`Year` == 2004, 1,
  ifelse(`Year` == 2005, 2,
  ifelse(`Year` == 2006, 3,
  ifelse(`Year` == 2007, 4,
  ifelse(`Year` == 2008, 5,
  ifelse(`Year` == 2009, 6,
  ifelse(`Year` == 2010, 7,
  ifelse(`Year` == 2011, 8,
  ifelse(`Year` == 2012, 9,
  ifelse(`Year` == 2013, 10,
  ifelse(`Year` == 2014, 11,
  ifelse(`Year` == 2015, 12,
  ifelse(`Year` == 2016, 13,
  ifelse(`Year` == 2017, 14,
  ifelse(`Year` == 2018, 15,
  ifelse(`Year` == 2019, 16,
  ifelse(`Year` == 2020, 17,
  ifelse(`Year` == 2021, 18,
  ifelse(`Year` == 2022, 19,
  ifelse(`Year` == 2023, 20,
  ifelse(`Year` == 2024, 21,
         NA))))))))))))))))))))))
) # centralize year to 0.
data_clean <- data_clean %>% dplyr::rename(`conference`= `Conference`,
                                    `school`= `School`,
                                    `area_classification` = `Area Classification (0-Rural; 1-Urban)`,
                                    `year`= `Year`,
                                    `tenure_year` = `Tenure Year`,
                                    `s_game`= `In-Season_Game`,
                                    `s_diversion`= `S_Diversion`,
                                    `attendance`= `Attendance`,
                                    `game_time`= `Game Time`,
                                    `game_result`= `Game result (Win=1; Loss=0)`,
                                    `profit`= `Athletic Dept Profit`,
                                    `total_expenses`= `Athletic Dept Total Expenses`,
                                    `total_revenues`= `Athletic Dept Total Revenues`,
                                    `game_time_chars_c_1`= `GameTime_numeric_c_1`,
                                    `game_time_num_c_2`= `GameTime_numeric_c_2`,
                                    `s_game_c`= `In-Season_Game_Centered`,
                                    `tenure_year_c`= `Tenure Year Centered`,
                                    `year_c`= `Year_Centered`
                                    ) %>% select(- `GameTime_numeric`) # rename the columns.

data_clean$game_time_chars_c_1 <-as.factor(data_clean$game_time_chars_c_1) # Change the variable into a factor variable.
# 
data_clean %<>%
  group_by(school) %>%
  mutate(attendance_mean_school = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_school = attendance - attendance_mean_school) %>%
  ungroup()
data_clean %<>%
  group_by(year) %>%
  mutate(attendance_mean_year = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_year = attendance - attendance_mean_year) %>%
  ungroup()
data_clean %<>%
mutate(total_revenues_mean = mean(total_revenues, na.rm = TRUE)) %>%
mutate(total_revenues_cgm = total_revenues - total_revenues_mean)
str(data_clean)
tibble [1,390 × 24] (S3: tbl_df/tbl/data.frame)
 $ conference            : Factor w/ 5 levels "ACC","Big10",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ school                : Factor w/ 31 levels "Arizona State",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ area_classification   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ year                  : num [1:1390] 2015 2015 2015 2015 2015 ...
 $ tenure_year           : num [1:1390] 1 1 1 1 1 1 1 2 2 2 ...
 $ s_game                : num [1:1390] 1 2 3 4 5 6 7 1 2 3 ...
 $ s_diversion           : num [1:1390] 0.44 0.412 0.315 0.57 0.579 ...
 $ attendance            : num [1:1390] 46500 43310 61904 44157 56534 ...
 $ game_time             : Factor w/ 54 levels "09:00","10:00",..: 49 44 46 44 46 13 15 49 44 44 ...
 $ game_result           : Factor w/ 2 levels "0","1": 2 2 1 2 1 2 2 2 2 2 ...
 $ profit                : num [1:1390] 566524 566524 566524 566524 566524 ...
 $ total_expenses        : num [1:1390] 83873516 83873516 83873516 83873516 83873516 ...
 $ total_revenues        : num [1:1390] 84440040 84440040 84440040 84440040 84440040 ...
 $ game_time_chars_c_1   : Factor w/ 4 levels "1","2","3","4": 4 4 4 4 4 2 2 4 4 4 ...
 $ game_time_num_c_2     : num [1:1390] 8 7 7.5 7 7.5 1 1.5 8 7 7 ...
 $ s_game_c              : num [1:1390] 0 1 2 3 4 5 6 0 1 2 ...
 $ tenure_year_c         : num [1:1390] 0 0 0 0 0 0 0 1 1 1 ...
 $ year_c                : num [1:1390] 12 12 12 12 12 12 12 13 13 13 ...
 $ attendance_mean_school: num [1:1390] 50009 50009 50009 50009 50009 ...
 $ attendance_cwc_school : num [1:1390] -3509 -6699 11895 -5852 6525 ...
 $ attendance_mean_year  : num [1:1390] 70003 70003 70003 70003 70003 ...
 $ attendance_cwc_year   : num [1:1390] -23503 -26693 -8099 -25846 -13469 ...
 $ total_revenues_mean   : num [1:1390] 1.25e+08 1.25e+08 1.25e+08 1.25e+08 1.25e+08 ...
 $ total_revenues_cgm    : num [1:1390] -40621158 -40621158 -40621158 -40621158 -40621158 ...

Analysis & Test

LM

data_clean %>% ggplot(mapping = aes(x = game_time_chars_c_1, y = s_diversion, colour = factor(conference))) +
  geom_point() +
  geom_smooth(mapping = aes(group = conference), method = "lm", se = FALSE, fullrange = TRUE) +
  labs(title = "s_diversion vs. game time (level by conference)",
       colour = "Conference") +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
`geom_smooth()` using formula = 'y ~ x'

data_clean %>% ggplot(mapping = aes(x = game_time_num_c_2, y = s_diversion, colour = factor(conference))) +
  geom_point() +
  geom_smooth(mapping = aes(group = conference), method = "lm", se = FALSE, fullrange = TRUE) +
  labs(title = "s_diversion vs. game time (level by conference)",
       colour = "Conference") +
  theme_classic() +
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
`geom_smooth()` using formula = 'y ~ x'

Boxplot

data_clean  %>% 
  ggplot(aes(x = game_time_chars_c_1, y = s_diversion, fill=school)) +
  geom_point(size=0.5) +
  geom_boxplot() +
  theme_classic()+
  theme(legend.position = "none")

data_clean  %>% 
  ggplot(aes(x = game_time_chars_c_1, y = s_diversion)) +
  geom_point(size=0.5) +
  geom_boxplot() +
  theme_classic() +
  theme(legend.position="none")

data_clean  %>% 
  ggplot(aes(x = game_time_chars_c_1, y = s_diversion, fill=conference)) +
  geom_point(size=0.5) +
  geom_boxplot() +
  theme_classic()+
  theme()

Loess

data_clean %>% ggplot(mapping = aes(x = game_time_chars_c_1, y = s_diversion, colour = factor(conference))) +
  geom_point() +
  geom_smooth(mapping = aes(group = conference), method = "loess", se = FALSE, fullrange = TRUE) +
  labs(title = "s_diversion vs. game time (level by conference)",
       colour = "Conference") +
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
`geom_smooth()` using formula = 'y ~ x'
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 0.985
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 2.015
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 8.6211e-16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 4.0602
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 0.985
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 2.015
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 9.2717e-16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 4.0602
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 4.015
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 2.015
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 3.3032e-16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 4.015
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 2.015
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 6.3755e-16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 1
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 4.015
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 2.015
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 7.9026e-16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 1

data_clean %>% ggplot(mapping = aes(x = game_time_num_c_2, y = s_diversion, colour = factor(conference))) +
  geom_point() +
  geom_smooth(mapping = aes(group = conference), method = "loess", se = FALSE, fullrange = TRUE) +
  labs(title = "s_diversion vs. game time (level by conference)",
       colour = "Conference") +
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5, face = "bold"))
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 73 rows containing missing values or values outside the scale range
(`geom_smooth()`).

Multilevel Modeling

Two Level

Year

year_model_1_null <- lmer(s_diversion ~ 1 + (1|year), data=data_clean)

summary(year_model_1_null)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ 1 + (1 | year)
   Data: data_clean

REML criterion at convergence: 424.5

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.1594 -0.7537 -0.2390  0.8449  2.2797 

Random effects:
 Groups   Name        Variance Std.Dev.
 year     (Intercept) 0.01307  0.1143  
 Residual             0.07647  0.2765  
Number of obs: 1390, groups:  year, 22

Fixed effects:
            Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)  0.37894    0.02623 18.52179   14.45 1.56e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Random effects year (Intercept): 0.01307 There’s some variability in average s_diversion across years.

Fixed effects (Intercept): 0.37894 The overall average s_diversion is 0.37894 across all years.

library(performance)
icc(year_model_1_null)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.146
  Unadjusted ICC: 0.146
library("sjPlot")
plot_model(year_model_1_null, type ='re', facet.grid = FALSE, sort.est = "sort.all", y.offset = .4)
Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.

School

school_model_1_null <- lmer(s_diversion ~ 1 + (1|school), data=data_clean)

summary(school_model_1_null)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ 1 + (1 | school)
   Data: data_clean

REML criterion at convergence: -1126.1

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.4297 -0.4986 -0.0120  0.5227  3.1356 

Random effects:
 Groups   Name        Variance Std.Dev.
 school   (Intercept) 0.06163  0.2482  
 Residual             0.02346  0.1532  
Number of obs: 1390, groups:  school, 31

Fixed effects:
            Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)  0.37253    0.04489 30.08790   8.299 2.84e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Random effects year (Intercept): 0.06163 There is variability in the average s_diversion across schools.

Fixed effects (Intercept): 0.37253 The overall average s_diversion is 0.37253, across all schools.

library("sjPlot")
plot_model(school_model_1_null, type ='re', facet.grid = FALSE, sort.est = "sort.all", y.offset = .4)
Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.

library(performance)
icc(school_model_1_null)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.724
  Unadjusted ICC: 0.724

Three Level

two_level_model_null <- lmer(s_diversion ~ 1+(1|school)+(1|school:year), data=data_clean)

summary(two_level_model_null)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ 1 + (1 | school) + (1 | school:year)
   Data: data_clean

REML criterion at convergence: -2448.2

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-5.6732 -0.4055 -0.0140  0.3816  5.6324 

Random effects:
 Groups      Name        Variance Std.Dev.
 school:year (Intercept) 0.019235 0.13869 
 school      (Intercept) 0.058885 0.24266 
 Residual                0.005853 0.07651 
Number of obs: 1390, groups:  school:year, 214; school, 31

Fixed effects:
            Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)  0.37444    0.04518 30.30701   8.288 2.76e-09 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Random effects year (Intercept): school:year 0.019235 school 0.058885 There is variance between schools, and the schools change over the years.

Fixed effects:0.37444 The overall average s_diversion is 0.37444, across all schools and years.

plot_model(two_level_model_null, type ='re', facet.grid = FALSE, sort.est = "sort.all", y.offset = .4)
Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.
Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.
[[1]]


[[2]]

library(performance)
icc(two_level_model_null)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.930
  Unadjusted ICC: 0.930
 frequency_table_school <- table(data_clean$school) %>% data.frame()
      frequency_table_school
                  Var1 Freq
1        Arizona State   33
2             Arkansas    6
3               Auburn   36
4              Clemson   28
5  Colorado University   59
6                 Duke   38
7              Florida   43
8              Georgia   36
9         Georgia Tech   33
10            Illinois   28
11                Iowa   16
12            Kentucky   34
13                 LSU   36
14            Maryland   29
15            Michigan  143
16      Michigan State   21
17           Minnesota   42
18            NC State   31
19          Ohio State  123
20            Oklahoma   23
21          Penn State   82
22              Purdue   55
23            Stanford   32
24           Tennessee   77
25                UCLA    7
26                 UNC  100
27             UoTexas   54
28                 USC   16
29          Washington   32
30    Washington State   42
31           Wisconsin   55
      ggplot(frequency_table_school, aes(x = Var1, y = Freq)) +
             geom_bar(stat = "identity", fill = "black") +
             labs(title = "Barplot for School Categories", x =
"Categories", y = "Frequency") +
       theme_classic()+
       theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8))

 table(data_clean$school,data_clean$year)
                     
                      2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
  Arizona State          0    0    0    0    0    0    0    0    0    0    0
  Arkansas               0    0    0    0    0    0    0    0    0    0    0
  Auburn                 0    0    0    0    0    0    0    0    0    0    0
  Clemson                0    0    0    0    0    0    0    0    0    0    0
  Colorado University    0    0    0    0    0    0    6    6    5    6    6
  Duke                   0    0    0    0    0    0    0    0    0    0    0
  Florida                0    0    0    0    0    0    0    0    0    0    6
  Georgia                0    0    0    0    0    0    0    0    0    0    0
  Georgia Tech           0    0    0    0    0    0    0    0    0    0    0
  Illinois               0    0    0    0    0    0    0    0    0    0    0
  Iowa                   0    0    0    0    0    0    0    0    0    0    0
  Kentucky               0    0    0    0    0    0    0    0    0    0    0
  LSU                    0    0    0    8    7    8    6    0    0    0    0
  Maryland               0    0    0    0    0    0    0    0    0    0    0
  Michigan               0    6    7    7    8    7    8    7    8    6    7
  Michigan State         0    0    0    0    0    0    0    0    0    0    0
  Minnesota              0    0    0    0    0    0    0    0    0    0    0
  NC State               0    0    0    0    0    0    0    0    0    0    0
  Ohio State             0    0    0    0    7    7    7    8    7    7    6
  Oklahoma               0    0    0    0    0    0    0    0    0    6    6
  Penn State             0    0    0    0    0    0    8    7    7    7    7
  Purdue                 0    0    0    0    0    0    0    0    0    7    7
  Stanford               0    0    0    0    0    0    0    0    0    0    0
  Tennessee              0    0    0    7    7    7    8    1    1    7    0
  UCLA                   0    0    0    0    0    0    0    0    0    0    0
  UNC                    0    6    6    7    6    7    7    6    7    7    7
  UoTexas                0    0    0    0    0    0    0    0    0    0    0
  USC                    0    0    0    0    0    0    0    0    0    0    0
  Washington             7    6    6    6    7    0    0    0    0    0    0
  Washington State       0    0    0    0    0    0    0    0    0    0    5
  Wisconsin              0    0    0    0    0    0    0    0    0    7    7
                     
                      2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024
  Arizona State          0    7    6    7    6    7    0    0    0    0    0
  Arkansas               0    0    0    0    6    0    0    0    0    0    0
  Auburn                 0    7    8    7    7    7    0    0    0    0    0
  Clemson                0    0    7    7    7    7    0    0    0    0    0
  Colorado University    6    6    6    6    6    0    0    0    0    0    0
  Duke                   7    6    6    7    6    6    0    0    0    0    0
  Florida                6    7    5    6    7    6    0    0    0    0    0
  Georgia                0    7    6    6    7    7    3    0    0    0    0
  Georgia Tech           0    7    7    6    6    7    0    0    0    0    0
  Illinois               0    0    0    0    0    0    0    7    7    7    7
  Iowa                   0    0    7    6    3    0    0    0    0    0    0
  Kentucky               0    0    7    7    7    8    5    0    0    0    0
  LSU                    0    7    0    0    0    0    0    0    0    0    0
  Maryland               6    6    6    6    5    0    0    0    0    0    0
  Michigan               7    7    8    6    7    7    0    7    8    7    8
  Michigan State         0    0    7    7    7    0    0    0    0    0    0
  Minnesota              0    0    0    7    7    7    0    0    7    7    7
  NC State               0    6    6    6    7    6    0    0    0    0    0
  Ohio State             7    7    7    7    7    7    2    7    8    6    9
  Oklahoma               6    5    0    0    0    0    0    0    0    0    0
  Penn State             7    7    7    7    7    7    0    4    0    0    0
  Purdue                 7    7    7    6    7    7    0    0    0    0    0
  Stanford               0    7    6    6    6    7    0    0    0    0    0
  Tennessee              7    7    7    7    3    8    0    0    0    0    0
  UCLA                   0    0    0    0    7    0    0    0    0    0    0
  UNC                    6    7    6    7    5    3    0    0    0    0    0
  UoTexas                0    0    6    6    6    5    4    6    7    6    8
  USC                    0    7    0    0    0    6    3    0    0    0    0
  Washington             0    0    0    0    0    0    0    0    0    0    0
  Washington State       6    6    7    6    6    6    0    0    0    0    0
  Wisconsin              7    7    6    7    7    7    0    0    0    0    0
     df <- data.frame(table(data_clean$school,data_clean$year))
      ggplot(df, aes(x=Var2, y= Var1, fill=Freq)) +
     geom_tile(color="white", size = 0.25) +
     geom_text(aes(label = Freq)) +
     scale_fill_gradientn(colours = c("white", "gold", "darkorchid"),
values = scales::rescale(c(0,1,40)), limits=c(0, 40))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Two level Model

model_school <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_cwc_school+ game_result + s_game_c + area_classification + tenure_year_c + total_revenues_cgm + (1|school),
data = data_clean
)
Warning: Some predictor variables are on very different scales: consider
rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
summary(model_school)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
s_diversion ~ game_time_num_c_2 + attendance_cwc_school + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_cgm +  
    (1 | school)
   Data: data_clean

REML criterion at convergence: -1246

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.6647 -0.6181 -0.0455  0.5359  4.0526 

Random effects:
 Groups   Name        Variance Std.Dev.
 school   (Intercept) 0.06038  0.2457  
 Residual             0.01773  0.1332  
Number of obs: 1245, groups:  school, 27

Fixed effects:
                        Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)            4.526e-01  1.432e-01  2.549e+01   3.161  0.00402 ** 
game_time_num_c_2     -1.036e-03  1.370e-03  1.215e+03  -0.756  0.44960    
attendance_cwc_school -2.476e-07  3.820e-07  1.214e+03  -0.648  0.51694    
game_result1           1.256e-03  9.010e-03  1.214e+03   0.139  0.88917    
s_game_c              -4.248e-04  1.971e-03  1.213e+03  -0.216  0.82938    
area_classification1  -1.644e-01  1.512e-01  2.506e+01  -1.087  0.28723    
tenure_year_c          1.385e-02  1.838e-03  1.233e+03   7.535 9.43e-14 ***
total_revenues_cgm     1.181e-09  2.053e-10  1.230e+03   5.751 1.12e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_
gm_tm_nm__2 -0.036                                          
attndnc_cw_ -0.009 -0.090                                   
game_reslt1 -0.042  0.038  0.046                            
s_game_c    -0.050  0.147 -0.070  0.167                     
ar_clssfct1 -0.939 -0.001  0.003 -0.009 -0.002              
tenure_yr_c -0.068 -0.043  0.182 -0.002  0.016  0.017       
ttl_rvns_cg  0.075 -0.010 -0.144 -0.057 -0.038 -0.030 -0.783
fit warnings:
Some predictor variables are on very different scales: consider rescaling
performance::icc(model_school)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.773
  Unadjusted ICC: 0.679
model_school_cat <- lmer(
s_diversion ~ game_time_chars_c_1 + attendance_cwc_school + game_result + s_game_c + area_classification + tenure_year_c + total_revenues_cgm + (1|school),
data = data_clean
)
Warning: Some predictor variables are on very different scales: consider
rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
summary(model_school_cat)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
s_diversion ~ game_time_chars_c_1 + attendance_cwc_school + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_cgm +  
    (1 | school)
   Data: data_clean

REML criterion at convergence: -1240

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.6546 -0.6153 -0.0385  0.5464  4.1266 

Random effects:
 Groups   Name        Variance Std.Dev.
 school   (Intercept) 0.06029  0.2455  
 Residual             0.01771  0.1331  
Number of obs: 1245, groups:  school, 27

Fixed effects:
                        Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)            4.714e-01  1.442e-01  2.635e+01   3.268  0.00301 ** 
game_time_chars_c_12  -1.974e-02  1.966e-02  1.216e+03  -1.004  0.31578    
game_time_chars_c_13  -3.331e-02  2.018e-02  1.217e+03  -1.651  0.09903 .  
game_time_chars_c_14  -1.890e-02  2.091e-02  1.217e+03  -0.904  0.36615    
attendance_cwc_school -2.385e-07  3.817e-07  1.212e+03  -0.625  0.53214    
game_result1           2.257e-03  9.026e-03  1.212e+03   0.250  0.80254    
s_game_c              -3.513e-04  1.967e-03  1.211e+03  -0.179  0.85829    
area_classification1  -1.652e-01  1.511e-01  2.507e+01  -1.093  0.28473    
tenure_year_c          1.373e-02  1.838e-03  1.231e+03   7.466 1.56e-13 ***
total_revenues_cgm     1.184e-09  2.053e-10  1.229e+03   5.767 1.02e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____12 g____13 g____14 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_
gm_tm_c__12 -0.128                                                           
gm_tm_c__13 -0.126  0.893                                                    
gm_tm_c__14 -0.127  0.864   0.859                                            
attndnc_cw_ -0.005 -0.028  -0.052  -0.063                                    
game_reslt1 -0.037 -0.036  -0.033   0.002   0.046                            
s_game_c    -0.049 -0.006   0.034   0.061  -0.068  0.169                     
ar_clssfct1 -0.933  0.012   0.010   0.012   0.002 -0.009 -0.001              
tenure_yr_c -0.065 -0.019  -0.013  -0.041   0.181 -0.004  0.017  0.016       
ttl_rvns_cg  0.074  0.011   0.000   0.001  -0.143 -0.057 -0.040 -0.030 -0.782
fit warnings:
Some predictor variables are on very different scales: consider rescaling
performance::icc(model_school_cat)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.773
  Unadjusted ICC: 0.679
model_year <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_cwc_year + game_result + s_game_c + area_classification + total_revenues_cgm + (1|year),
data = data_clean
)
Warning: Some predictor variables are on very different scales: consider
rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
summary(model_year)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ game_time_num_c_2 + attendance_cwc_year + game_result +  
    s_game_c + area_classification + total_revenues_cgm + (1 |      year)
   Data: data_clean

REML criterion at convergence: 445.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.0293 -0.7457 -0.3116  0.8336  2.2836 

Random effects:
 Groups   Name        Variance  Std.Dev.
 year     (Intercept) 0.0003515 0.01875 
 Residual             0.0768552 0.27723 
Number of obs: 1245, groups:  year, 20

Fixed effects:
                       Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)           4.457e-01  3.393e-02  2.265e+02  13.136  < 2e-16 ***
game_time_num_c_2    -6.502e-03  2.637e-03  1.236e+03  -2.466  0.01381 *  
attendance_cwc_year  -1.152e-06  3.978e-07  2.185e+02  -2.895  0.00417 ** 
game_result1          3.258e-03  1.817e-02  1.238e+03   0.179  0.85774    
s_game_c             -2.251e-03  4.057e-03  1.227e+03  -0.555  0.57913    
area_classification1 -2.588e-02  2.890e-02  1.054e+03  -0.895  0.37084    
total_revenues_cgm    2.095e-09  2.101e-10  3.325e+01   9.967 1.62e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1
gm_tm_nm__2 -0.283                                   
attndnc_cw_  0.222  0.014                            
game_reslt1 -0.370  0.025 -0.092                     
s_game_c    -0.431  0.130 -0.060  0.158              
ar_clssfct1 -0.732 -0.022 -0.246 -0.090 -0.007       
ttl_rvns_cg  0.093 -0.041 -0.550 -0.104 -0.023 -0.030
fit warnings:
Some predictor variables are on very different scales: consider rescaling
performance::icc(model_year)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.005
  Unadjusted ICC: 0.004
model_year_cat <- lmer(
s_diversion ~ game_time_chars_c_1 + attendance_cwc_year + game_result + s_game_c + area_classification + tenure_year_c + total_revenues_cgm + (1|year),
data = data_clean
)
Warning: Some predictor variables are on very different scales: consider
rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
summary(model_year_cat)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
s_diversion ~ game_time_chars_c_1 + attendance_cwc_year + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_cgm +  
    (1 | year)
   Data: data_clean

REML criterion at convergence: 417.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.8337 -0.7852 -0.2846  0.7837  2.4683 

Random effects:
 Groups   Name        Variance  Std.Dev.
 year     (Intercept) 0.0001522 0.01234 
 Residual             0.0743757 0.27272 
Number of obs: 1245, groups:  year, 20

Fixed effects:
                       Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)           3.913e-01  4.708e-02  4.925e+02   8.311 9.30e-16 ***
game_time_chars_c_12  6.882e-03  3.586e-02  1.221e+03   0.192  0.84784    
game_time_chars_c_13 -4.074e-02  3.642e-02  1.217e+03  -1.119  0.26351    
game_time_chars_c_14 -2.393e-02  3.736e-02  1.228e+03  -0.640  0.52197    
attendance_cwc_year  -1.008e-06  3.881e-07  4.114e+02  -2.598  0.00971 ** 
game_result1          1.304e-03  1.788e-02  1.234e+03   0.073  0.94186    
s_game_c             -1.828e-03  3.986e-03  1.231e+03  -0.459  0.64663    
area_classification1 -4.424e-02  2.872e-02  1.075e+03  -1.541  0.12368    
tenure_year_c         1.241e-02  1.850e-03  1.090e+03   6.705 3.23e-11 ***
total_revenues_cgm    1.575e-09  2.150e-10  1.140e+02   7.329 3.59e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____12 g____13 g____14 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_
gm_tm_c__12 -0.695                                                           
gm_tm_c__13 -0.678  0.867                                                    
gm_tm_c__14 -0.688  0.846   0.831                                            
attndnc_cw_  0.193 -0.050  -0.068  -0.045                                    
game_reslt1 -0.243 -0.025  -0.020   0.003  -0.092                            
s_game_c    -0.287 -0.025   0.017   0.039  -0.063  0.159                     
ar_clssfct1 -0.579  0.120   0.086   0.111  -0.248 -0.090 -0.009              
tenure_yr_c -0.047 -0.146  -0.130  -0.129   0.040 -0.012  0.010 -0.113       
ttl_rvns_cg  0.070  0.069   0.058   0.032  -0.514 -0.098 -0.028  0.010 -0.358
fit warnings:
Some predictor variables are on very different scales: consider rescaling
performance::icc(model_year_cat)
# Intraclass Correlation Coefficient

    Adjusted ICC: 0.002
  Unadjusted ICC: 0.002