── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ purrr 1.0.4
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)library(lme4)
Loading required package: Matrix
Attaching package: 'Matrix'
The following objects are masked from 'package:tidyr':
expand, pack, unpack
library(lmerTest)
Warning: package 'lmerTest' was built under R version 4.5.2
Attaching package: 'lmerTest'
The following object is masked from 'package:lme4':
lmer
The following object is masked from 'package:stats':
step
Import the Data
Clean the data
data_clean <- data %>%select(`Conference`, `School`, `Area Classification (0-Rural; 1-Urban)`, `Year`, `Tenure Year`, `In-Season_Game`, `S_Diversion`, `Attendance`, `Game Time`,`Game result (Win=1; Loss=0)`,`Athletic Dept Profit`, `Athletic Dept Total Expenses`, `Athletic Dept Total Revenues`) # select the columndata_clean$GameTime_numeric <-as.numeric(format(data_clean$`Game Time`, "%H")) +as.numeric(format(data_clean$`Game Time`, "%M"))/60# convert game time to numerical variable and create a new columndata_clean$`Game Time`=format(data_clean$`Game Time`, format ="%H:%M") # avoid game time impacted by computer system datedata_clean <- data_clean %>%mutate(`Game Time`=as.character(`Game Time`)) %>%mutate(`Area Classification (0-Rural; 1-Urban)`=as.character(`Area Classification (0-Rural; 1-Urban)`)) %>%mutate(`Attendance`=as.numeric(`Attendance`)) # Convert the variable to its suitable data type.
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Attendance = as.numeric(Attendance)`.
Caused by warning:
! NAs introduced by coercion
data_clean <- data_clean %>%mutate(`Game result (Win=1; Loss=0)`=na_if(`Game result (Win=1; Loss=0)`, "N/A")) %>%mutate(across(all_of(cols_to_factor), as.factor)) # Make sure there is no NA level.
data_clean <-subset(data_clean, !is.na(`Game result (Win=1; Loss=0)`)) # Clean the game that is being cancelled.
data_clean$GameTime_numeric_c_1 <-with(data_clean, ifelse( GameTime_numeric >=9& GameTime_numeric <12, 1, # Morningifelse(GameTime_numeric >=12& GameTime_numeric <15.5, 2, # noonifelse(GameTime_numeric >=15.5& GameTime_numeric <19, 3, # afternoonifelse(GameTime_numeric >=19, 4, # eveningNA))) )) # Classify game time into four time slots.
data_clean$GameTime_numeric_c_2 <- data_clean$GameTime_numeric-12# centralize game time by 12
data_clean$`In-Season_Game_Centered`<-with(data_clean,ifelse(`In-Season_Game`==1, 0,ifelse(`In-Season_Game`==2, 1,ifelse(`In-Season_Game`==3, 2,ifelse(`In-Season_Game`==4, 3,ifelse(`In-Season_Game`==5, 4,ifelse(`In-Season_Game`==6, 5,ifelse(`In-Season_Game`==7, 6,ifelse(`In-Season_Game`==8, 7,ifelse(`In-Season_Game`==9, 8, NA)))))))))) # Centralize in the in-season game to 0.
data_clean <- data_clean %>% dplyr::rename(`conference`=`Conference`,`school`=`School`,`area_classification`=`Area Classification (0-Rural; 1-Urban)`,`year`=`Year`,`tenure_year`=`Tenure Year`,`s_game`=`In-Season_Game`,`s_diversion`=`S_Diversion`,`attendance`=`Attendance`,`game_time`=`Game Time`,`game_result`=`Game result (Win=1; Loss=0)`,`profit`=`Athletic Dept Profit`,`total_expenses`=`Athletic Dept Total Expenses`,`total_revenues`=`Athletic Dept Total Revenues`,`game_time_chars_c_1`=`GameTime_numeric_c_1`,`game_time_num_c_2`=`GameTime_numeric_c_2`,`s_game_c`=`In-Season_Game_Centered`,`tenure_year_c`=`Tenure Year Centered`,`year_c`=`Year_Centered` ) %>%select(-`GameTime_numeric`) # rename the columns.data_clean$game_time_chars_c_1 <-as.factor(data_clean$game_time_chars_c_1) # Change the variable into a factor variable.
data_clean %>%ggplot(mapping =aes(x = game_time_chars_c_1, y = s_diversion, colour =factor(conference))) +geom_point() +geom_smooth(mapping =aes(group = conference), method ="loess", se =FALSE, fullrange =TRUE) +labs(title ="s_diversion vs. game time (level by conference)",colour ="Conference") +theme_classic()+theme(plot.title =element_text(hjust =0.5, face ="bold"))
`geom_smooth()` using formula = 'y ~ x'
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 0.985
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 7.9026e-16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 1
data_clean %>%ggplot(mapping =aes(x = game_time_num_c_2, y = s_diversion, colour =factor(conference))) +geom_point() +geom_smooth(mapping =aes(group = conference), method ="loess", se =FALSE, fullrange =TRUE) +labs(title ="s_diversion vs. game time (level by conference)",colour ="Conference") +theme_classic()+theme(plot.title =element_text(hjust =0.5, face ="bold"))
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 73 rows containing missing values or values outside the scale range
(`geom_smooth()`).
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ 1 + (1 | school) + (1 | school:year)
Data: data_clean
REML criterion at convergence: -2448.2
Scaled residuals:
Min 1Q Median 3Q Max
-5.6732 -0.4055 -0.0140 0.3816 5.6324
Random effects:
Groups Name Variance Std.Dev.
school:year (Intercept) 0.019235 0.13869
school (Intercept) 0.058885 0.24266
Residual 0.005853 0.07651
Number of obs: 1390, groups: school:year, 214; school, 31
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 0.37444 0.04518 30.30701 8.288 2.76e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Random effects year (Intercept): school:year 0.019235 school 0.058885 There is variance between schools, and the schools change over the years.
Fixed effects:0.37444 The overall average s_diversion is 0.37444, across all schools and years.
plot_model(two_level_model_null, type ='re', facet.grid =FALSE, sort.est ="sort.all", y.offset = .4)
Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.
Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.
Var1 Freq
1 Arizona State 33
2 Arkansas 6
3 Auburn 36
4 Clemson 28
5 Colorado University 59
6 Duke 38
7 Florida 43
8 Georgia 36
9 Georgia Tech 33
10 Illinois 28
11 Iowa 16
12 Kentucky 34
13 LSU 36
14 Maryland 29
15 Michigan 143
16 Michigan State 21
17 Minnesota 42
18 NC State 31
19 Ohio State 123
20 Oklahoma 23
21 Penn State 82
22 Purdue 55
23 Stanford 32
24 Tennessee 77
25 UCLA 7
26 UNC 100
27 UoTexas 54
28 USC 16
29 Washington 32
30 Washington State 42
31 Wisconsin 55
ggplot(frequency_table_school, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for School Categories", x ="Categories", y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(angle =90, hjust =1, size =8))
Warning: Some predictor variables are on very different scales: consider
rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
summary(model_school)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula:
s_diversion ~ game_time_num_c_2 + attendance_cwc_school + game_result +
s_game_c + area_classification + tenure_year_c + total_revenues_cgm +
(1 | school)
Data: data_clean
REML criterion at convergence: -1246
Scaled residuals:
Min 1Q Median 3Q Max
-3.6647 -0.6181 -0.0455 0.5359 4.0526
Random effects:
Groups Name Variance Std.Dev.
school (Intercept) 0.06038 0.2457
Residual 0.01773 0.1332
Number of obs: 1245, groups: school, 27
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 4.526e-01 1.432e-01 2.549e+01 3.161 0.00402 **
game_time_num_c_2 -1.036e-03 1.370e-03 1.215e+03 -0.756 0.44960
attendance_cwc_school -2.476e-07 3.820e-07 1.214e+03 -0.648 0.51694
game_result1 1.256e-03 9.010e-03 1.214e+03 0.139 0.88917
s_game_c -4.248e-04 1.971e-03 1.213e+03 -0.216 0.82938
area_classification1 -1.644e-01 1.512e-01 2.506e+01 -1.087 0.28723
tenure_year_c 1.385e-02 1.838e-03 1.233e+03 7.535 9.43e-14 ***
total_revenues_cgm 1.181e-09 2.053e-10 1.230e+03 5.751 1.12e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_
gm_tm_nm__2 -0.036
attndnc_cw_ -0.009 -0.090
game_reslt1 -0.042 0.038 0.046
s_game_c -0.050 0.147 -0.070 0.167
ar_clssfct1 -0.939 -0.001 0.003 -0.009 -0.002
tenure_yr_c -0.068 -0.043 0.182 -0.002 0.016 0.017
ttl_rvns_cg 0.075 -0.010 -0.144 -0.057 -0.038 -0.030 -0.783
fit warnings:
Some predictor variables are on very different scales: consider rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
summary(model_school_cat)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula:
s_diversion ~ game_time_chars_c_1 + attendance_cwc_school + game_result +
s_game_c + area_classification + tenure_year_c + total_revenues_cgm +
(1 | school)
Data: data_clean
REML criterion at convergence: -1240
Scaled residuals:
Min 1Q Median 3Q Max
-3.6546 -0.6153 -0.0385 0.5464 4.1266
Random effects:
Groups Name Variance Std.Dev.
school (Intercept) 0.06029 0.2455
Residual 0.01771 0.1331
Number of obs: 1245, groups: school, 27
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 4.714e-01 1.442e-01 2.635e+01 3.268 0.00301 **
game_time_chars_c_12 -1.974e-02 1.966e-02 1.216e+03 -1.004 0.31578
game_time_chars_c_13 -3.331e-02 2.018e-02 1.217e+03 -1.651 0.09903 .
game_time_chars_c_14 -1.890e-02 2.091e-02 1.217e+03 -0.904 0.36615
attendance_cwc_school -2.385e-07 3.817e-07 1.212e+03 -0.625 0.53214
game_result1 2.257e-03 9.026e-03 1.212e+03 0.250 0.80254
s_game_c -3.513e-04 1.967e-03 1.211e+03 -0.179 0.85829
area_classification1 -1.652e-01 1.511e-01 2.507e+01 -1.093 0.28473
tenure_year_c 1.373e-02 1.838e-03 1.231e+03 7.466 1.56e-13 ***
total_revenues_cgm 1.184e-09 2.053e-10 1.229e+03 5.767 1.02e-08 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) g____12 g____13 g____14 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_
gm_tm_c__12 -0.128
gm_tm_c__13 -0.126 0.893
gm_tm_c__14 -0.127 0.864 0.859
attndnc_cw_ -0.005 -0.028 -0.052 -0.063
game_reslt1 -0.037 -0.036 -0.033 0.002 0.046
s_game_c -0.049 -0.006 0.034 0.061 -0.068 0.169
ar_clssfct1 -0.933 0.012 0.010 0.012 0.002 -0.009 -0.001
tenure_yr_c -0.065 -0.019 -0.013 -0.041 0.181 -0.004 0.017 0.016
ttl_rvns_cg 0.074 0.011 0.000 0.001 -0.143 -0.057 -0.040 -0.030 -0.782
fit warnings:
Some predictor variables are on very different scales: consider rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
summary(model_year)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ game_time_num_c_2 + attendance_cwc_year + game_result +
s_game_c + area_classification + total_revenues_cgm + (1 | year)
Data: data_clean
REML criterion at convergence: 445.6
Scaled residuals:
Min 1Q Median 3Q Max
-2.0293 -0.7457 -0.3116 0.8336 2.2836
Random effects:
Groups Name Variance Std.Dev.
year (Intercept) 0.0003515 0.01875
Residual 0.0768552 0.27723
Number of obs: 1245, groups: year, 20
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 4.457e-01 3.393e-02 2.265e+02 13.136 < 2e-16 ***
game_time_num_c_2 -6.502e-03 2.637e-03 1.236e+03 -2.466 0.01381 *
attendance_cwc_year -1.152e-06 3.978e-07 2.185e+02 -2.895 0.00417 **
game_result1 3.258e-03 1.817e-02 1.238e+03 0.179 0.85774
s_game_c -2.251e-03 4.057e-03 1.227e+03 -0.555 0.57913
area_classification1 -2.588e-02 2.890e-02 1.054e+03 -0.895 0.37084
total_revenues_cgm 2.095e-09 2.101e-10 3.325e+01 9.967 1.62e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1
gm_tm_nm__2 -0.283
attndnc_cw_ 0.222 0.014
game_reslt1 -0.370 0.025 -0.092
s_game_c -0.431 0.130 -0.060 0.158
ar_clssfct1 -0.732 -0.022 -0.246 -0.090 -0.007
ttl_rvns_cg 0.093 -0.041 -0.550 -0.104 -0.023 -0.030
fit warnings:
Some predictor variables are on very different scales: consider rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
Warning: Some predictor variables are on very different scales: consider
rescaling
summary(model_year_cat)
Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula:
s_diversion ~ game_time_chars_c_1 + attendance_cwc_year + game_result +
s_game_c + area_classification + tenure_year_c + total_revenues_cgm +
(1 | year)
Data: data_clean
REML criterion at convergence: 417.6
Scaled residuals:
Min 1Q Median 3Q Max
-1.8337 -0.7852 -0.2846 0.7837 2.4683
Random effects:
Groups Name Variance Std.Dev.
year (Intercept) 0.0001522 0.01234
Residual 0.0743757 0.27272
Number of obs: 1245, groups: year, 20
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.913e-01 4.708e-02 4.925e+02 8.311 9.30e-16 ***
game_time_chars_c_12 6.882e-03 3.586e-02 1.221e+03 0.192 0.84784
game_time_chars_c_13 -4.074e-02 3.642e-02 1.217e+03 -1.119 0.26351
game_time_chars_c_14 -2.393e-02 3.736e-02 1.228e+03 -0.640 0.52197
attendance_cwc_year -1.008e-06 3.881e-07 4.114e+02 -2.598 0.00971 **
game_result1 1.304e-03 1.788e-02 1.234e+03 0.073 0.94186
s_game_c -1.828e-03 3.986e-03 1.231e+03 -0.459 0.64663
area_classification1 -4.424e-02 2.872e-02 1.075e+03 -1.541 0.12368
tenure_year_c 1.241e-02 1.850e-03 1.090e+03 6.705 3.23e-11 ***
total_revenues_cgm 1.575e-09 2.150e-10 1.140e+02 7.329 3.59e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation of Fixed Effects:
(Intr) g____12 g____13 g____14 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_
gm_tm_c__12 -0.695
gm_tm_c__13 -0.678 0.867
gm_tm_c__14 -0.688 0.846 0.831
attndnc_cw_ 0.193 -0.050 -0.068 -0.045
game_reslt1 -0.243 -0.025 -0.020 0.003 -0.092
s_game_c -0.287 -0.025 0.017 0.039 -0.063 0.159
ar_clssfct1 -0.579 0.120 0.086 0.111 -0.248 -0.090 -0.009
tenure_yr_c -0.047 -0.146 -0.130 -0.129 0.040 -0.012 0.010 -0.113
ttl_rvns_cg 0.070 0.069 0.058 0.032 -0.514 -0.098 -0.028 0.010 -0.358
fit warnings:
Some predictor variables are on very different scales: consider rescaling