Stadium_Waste_Analysis_report_Jingyi_Yang

Author

Jingyi Yang

Install Packages

library(readxl)
library("readr")
library("tidyverse")

Warning: package 'ggplot2' was built under R version 4.5.2

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ purrr     1.0.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(lme4)

Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

library(lmerTest)

Warning: package 'lmerTest' was built under R version 4.5.2


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library("effectsize")

Warning: package 'effectsize' was built under R version 4.5.2

Import the Data

Clean the data

# Select the columns
data_clean <- data %>% select(`Conference`, `School`, `Area Classification (0-Rural; 1-Urban)`, `Year`, `Tenure Year`, `In-Season_Game`, `S_Diversion`, `Attendance`, `Game Time`,`Game result (Win=1; Loss=0)`,`Athletic Dept Profit`, `Athletic Dept Total Expenses`, `Athletic Dept Total Revenues`) 
# Convert game time to a numerical variable and create a new column
data_clean$GameTime_numeric <- as.numeric(format(data_clean$`Game Time`, "%H")) + as.numeric(format(data_clean$`Game Time`, "%M"))/60 
# Avoid game time impacted by the computer system date
data_clean$`Game Time`=format(data_clean$`Game Time`, format = "%H:%M") 
# Convert the categorical variables to the factor data type
data_clean <- data_clean %>% mutate(`Game Time`= as.character(`Game Time`)) %>% mutate(`Area Classification (0-Rural; 1-Urban)`= as.character(`Area Classification (0-Rural; 1-Urban)`)) %>% mutate(`Attendance`= as.numeric(`Attendance`))

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Attendance = as.numeric(Attendance)`.
Caused by warning:
! NAs introduced by coercion

# Convert the categorical variables to the factor data type
cols_to_factor <- data_clean%>% select_if(is.character) %>% colnames() 
cols_to_factor

[1] "Conference"                            
[2] "School"                                
[3] "Area Classification (0-Rural; 1-Urban)"
[4] "Game Time"                             
[5] "Game result (Win=1; Loss=0)"

# Make sure there is no NA level 
data_clean <- data_clean %>% 
  mutate(`Game result (Win=1; Loss=0)` = na_if(`Game result (Win=1; Loss=0)`, "N/A")) %>%
          mutate(across(all_of(cols_to_factor), as.factor))

# Clean the game that is being cancelled
data_clean <- subset(data_clean, !is.na(`Game result (Win=1; Loss=0)`))

# Classify game time into four time slots
data_clean$GameTime_numeric_c_1 <- with(data_clean, ifelse(
  GameTime_numeric >= 9 &  GameTime_numeric < 12, 1,  # Morning
  ifelse(GameTime_numeric >= 12 & GameTime_numeric < 15.5, 2,   # Noon
  ifelse(GameTime_numeric >= 15.5 & GameTime_numeric < 19, 3,   # Afternoon
  ifelse(GameTime_numeric >= 19,  4,   # Evening
  NA)))                                
))

# Centralize game time by 12
data_clean$GameTime_numeric_c_2 <-  data_clean$GameTime_numeric-12

# Centralize in the in-season game to 0
data_clean$`In-Season_Game_Centered` <- with(data_clean,
  ifelse(`In-Season_Game` == 1, 0,
  ifelse(`In-Season_Game` == 2, 1,
  ifelse(`In-Season_Game` == 3, 2,
  ifelse(`In-Season_Game` == 4, 3,
  ifelse(`In-Season_Game` == 5, 4,
  ifelse(`In-Season_Game` == 6, 5,
  ifelse(`In-Season_Game` == 7, 6,
  ifelse(`In-Season_Game` == 8, 7,
  ifelse(`In-Season_Game` == 9, 8, NA)))))))))
)

# Centralize the tenure year to 0
data_clean$`Tenure Year Centered` <- with(data_clean,
  ifelse(`Tenure Year` == 1, 0,
  ifelse(`Tenure Year` == 2, 1,
  ifelse(`Tenure Year` == 3, 2,
  ifelse(`Tenure Year` == 4, 3,
  ifelse(`Tenure Year` == 5, 4,
  ifelse(`Tenure Year` == 6, 5,
  ifelse(`Tenure Year` == 7, 6,
  ifelse(`Tenure Year` == 8, 7,
  ifelse(`Tenure Year` == 9, 8,
  ifelse(`Tenure Year` == 10, 9,
  ifelse(`Tenure Year` == 11, 10,
  ifelse(`Tenure Year` == 12, 11,
  ifelse(`Tenure Year` == 13, 12,
  ifelse(`Tenure Year` == 14, 13,
  ifelse(`Tenure Year` == 15, 14,
  ifelse(`Tenure Year` == 16, 15,
  ifelse(`Tenure Year` == 17, 16,
  ifelse(`Tenure Year` == 18, 17,
  ifelse(`Tenure Year` == 19, 18,
  ifelse(`Tenure Year` == 20, 19,
         NA))))))))))))))))))))
)

 # Centralize the year to 0
data_clean$Year_Centered <- with(data_clean,
  ifelse(`Year` == 2003, 0,
  ifelse(`Year` == 2004, 1,
  ifelse(`Year` == 2005, 2,
  ifelse(`Year` == 2006, 3,
  ifelse(`Year` == 2007, 4,
  ifelse(`Year` == 2008, 5,
  ifelse(`Year` == 2009, 6,
  ifelse(`Year` == 2010, 7,
  ifelse(`Year` == 2011, 8,
  ifelse(`Year` == 2012, 9,
  ifelse(`Year` == 2013, 10,
  ifelse(`Year` == 2014, 11,
  ifelse(`Year` == 2015, 12,
  ifelse(`Year` == 2016, 13,
  ifelse(`Year` == 2017, 14,
  ifelse(`Year` == 2018, 15,
  ifelse(`Year` == 2019, 16,
  ifelse(`Year` == 2020, 17,
  ifelse(`Year` == 2021, 18,
  ifelse(`Year` == 2022, 19,
  ifelse(`Year` == 2023, 20,
  ifelse(`Year` == 2024, 21,
         NA))))))))))))))))))))))
)

# Rename the columns
data_clean <- data_clean %>% dplyr::rename(`conference`= `Conference`,
                                    `school`= `School`,
                                    `area_classification` = `Area Classification (0-Rural; 1-Urban)`,
                                    `year`= `Year`,
                                    `tenure_year` = `Tenure Year`,
                                    `s_game`= `In-Season_Game`,
                                    `s_diversion`= `S_Diversion`,
                                    `attendance`= `Attendance`,
                                    `game_time`= `Game Time`,
                                    `game_result`= `Game result (Win=1; Loss=0)`,
                                    `profit`= `Athletic Dept Profit`,
                                    `total_expenses`= `Athletic Dept Total Expenses`,
                                    `total_revenues`= `Athletic Dept Total Revenues`,
                                    `game_time_chars_c_1`= `GameTime_numeric_c_1`,
                                    `game_time_num_c_2`= `GameTime_numeric_c_2`,
                                    `s_game_c`= `In-Season_Game_Centered`,
                                    `tenure_year_c`= `Tenure Year Centered`,
                                    `year_c`= `Year_Centered`
                                    ) %>% select(- `GameTime_numeric`) 

# Change the variable into a factor variable
data_clean$game_time_chars_c_1 <-as.factor(data_clean$game_time_chars_c_1)

# The variable Attendance belongs to the game level (level 1). Use the Centering Within cluster (CWC) method to centralize the attendance variable, making each cluster have a mean of 0.

data_clean %<>%
  group_by(school) %>%
  mutate(attendance_mean_school = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_school = attendance - attendance_mean_school) %>%
  ungroup()

# Rescale the attendance to make the variable have a mean of 0 and a standard deviation of 1 to solve the issue of different scales.

data_clean %<>%
  group_by(school) %>%
  mutate(attendance_mean_school = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_school_z = (attendance - attendance_mean_school)/sd(attendance, na.rm = TRUE)) %>% 
  ungroup()

# The variable Attendance belongs to the game level (level 1). Use the Centering Within cluster (CWC) method to centralize the attendance variable, making each cluster have a mean of 0.

data_clean %<>%
  group_by(year) %>%
  mutate(attendance_mean_year = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_cwc_year = attendance - attendance_mean_year) %>%
  ungroup()

# Rescale the attendance to make the variable have a mean of 0 and a standard deviation of 1 to solve the issue of different scales.

data_clean %<>%
  group_by(year) %>%
  mutate(attendance_mean_year = mean(attendance, na.rm = TRUE)) %>%
  mutate(attendance_year_z = (attendance - attendance_mean_year)/sd(attendance, na.rm = TRUE)) %>%
  ungroup()

# The variable total revenues belongs to the year level (level 2). Use the Centering Grand Mean (CGM) method to centralize the total revenues variable, making all individuals have a mean of 0.

data_clean %<>%
mutate(total_revenues_mean = mean(total_revenues, na.rm = TRUE)) %>%
mutate(total_revenues_cgm = total_revenues - total_revenues_mean)

# Rescale the attendance to make the variable have a mean of 0 and a standard deviation of 1 to solve the issue of different scales.

data_clean %<>%
mutate(total_revenues_mean = mean(total_revenues, na.rm = TRUE)) %>%
mutate(total_revenues_z = (total_revenues - total_revenues_mean)/sd(total_revenues,na.rm = TRUE))

str(data_clean)

tibble [1,390 × 27] (S3: tbl_df/tbl/data.frame)
 $ conference            : Factor w/ 5 levels "ACC","Big10",..: 4 4 4 4 4 4 4 4 4 4 ...
 $ school                : Factor w/ 31 levels "Arizona State",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ area_classification   : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ year                  : num [1:1390] 2015 2015 2015 2015 2015 ...
 $ tenure_year           : num [1:1390] 1 1 1 1 1 1 1 2 2 2 ...
 $ s_game                : num [1:1390] 1 2 3 4 5 6 7 1 2 3 ...
 $ s_diversion           : num [1:1390] 0.44 0.412 0.315 0.57 0.579 ...
 $ attendance            : num [1:1390] 46500 43310 61904 44157 56534 ...
 $ game_time             : Factor w/ 54 levels "09:00","10:00",..: 49 44 46 44 46 13 15 49 44 44 ...
 $ game_result           : Factor w/ 2 levels "0","1": 2 2 1 2 1 2 2 2 2 2 ...
 $ profit                : num [1:1390] 566524 566524 566524 566524 566524 ...
 $ total_expenses        : num [1:1390] 83873516 83873516 83873516 83873516 83873516 ...
 $ total_revenues        : num [1:1390] 84440040 84440040 84440040 84440040 84440040 ...
 $ game_time_chars_c_1   : Factor w/ 4 levels "1","2","3","4": 4 4 4 4 4 2 2 4 4 4 ...
 $ game_time_num_c_2     : num [1:1390] 8 7 7.5 7 7.5 1 1.5 8 7 7 ...
 $ s_game_c              : num [1:1390] 0 1 2 3 4 5 6 0 1 2 ...
 $ tenure_year_c         : num [1:1390] 0 0 0 0 0 0 0 1 1 1 ...
 $ year_c                : num [1:1390] 12 12 12 12 12 12 12 13 13 13 ...
 $ attendance_mean_school: num [1:1390] 50009 50009 50009 50009 50009 ...
 $ attendance_cwc_school : num [1:1390] -3509 -6699 11895 -5852 6525 ...
 $ attendance_school_z   : num [1:1390] -0.65 -1.24 2.2 -1.08 1.21 ...
 $ attendance_mean_year  : num [1:1390] 70003 70003 70003 70003 70003 ...
 $ attendance_cwc_year   : num [1:1390] -23503 -26693 -8099 -25846 -13469 ...
 $ attendance_year_z     : num [1:1390] -0.856 -0.972 -0.295 -0.941 -0.49 ...
 $ total_revenues_mean   : num [1:1390] 1.25e+08 1.25e+08 1.25e+08 1.25e+08 1.25e+08 ...
 $ total_revenues_cgm    : num [1:1390] -40621158 -40621158 -40621158 -40621158 -40621158 ...
 $ total_revenues_z      : num [1:1390] -0.833 -0.833 -0.833 -0.833 -0.833 ...

Two level Model

Random Slope Models

The model that treats tenure years as a random slope performs better.

The ICC increases noticeably in the tenure-year model (model_school_r_2).
In attendance model (model_school_r_1), the random slope variance for attendance is extremely small (0.000872) and essentially zero correlation with the intercept. However, in tenure-year model (model_school_r_2), the random slope variance for tenure year is measurable (0.001163) with a correlation –0.20.
The Residual SD drops from 0.13007 in attendance model (model_school_r_1) to 0.1105 in tenure-year model (model_school_r_2).

Random Slopes for Attendance

model_school_r_1 <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_school_z+ game_result + s_game_c + area_classification + tenure_year_c + total_revenues_z +conference + (1+attendance_school_z|school),
data = data_clean
)

summary(model_school_r_1)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ game_time_num_c_2 + attendance_school_z + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_z +  
    conference + (1 + attendance_school_z | school)
   Data: data_clean

REML criterion at convergence: -1323.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.7533 -0.6092 -0.0483  0.5221  3.6153 

Random effects:
 Groups   Name                Variance Std.Dev. Corr
 school   (Intercept)         0.062837 0.25067      
          attendance_school_z 0.000872 0.02953  0.00
 Residual                     0.016917 0.13007      
Number of obs: 1245, groups:  school, 27

Fixed effects:
                       Estimate Std. Error         df t value Pr(>|t|)    
(Intercept)           5.718e-01  2.075e-01  2.122e+01   2.755   0.0118 *  
game_time_num_c_2    -7.037e-04  1.357e-03  1.207e+03  -0.519   0.6042    
attendance_school_z  -3.909e-03  7.172e-03  2.651e+01  -0.545   0.5903    
game_result1         -6.826e-04  8.872e-03  1.203e+03  -0.077   0.9387    
s_game_c             -3.757e-04  1.938e-03  1.204e+03  -0.194   0.8463    
area_classification1 -1.374e-01  1.642e-01  2.098e+01  -0.836   0.4124    
tenure_year_c         1.456e-02  1.846e-03  1.232e+03   7.884 6.92e-15 ***
total_revenues_z      5.817e-02  1.005e-02  1.232e+03   5.789 8.97e-09 ***
conferenceBig10      -1.397e-01  1.526e-01  2.103e+01  -0.915   0.3704    
conferenceBig12      -2.881e-01  2.185e-01  2.115e+01  -1.319   0.2014    
conferencePac12      -8.186e-02  1.830e-01  2.112e+01  -0.447   0.6592    
conferenceSEC        -2.351e-01  1.581e-01  2.114e+01  -1.487   0.1518    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_ ttl_r_ cnfB10
gm_tm_nm__2 -0.024                                                        
attndnc_sc_ -0.004 -0.076                                                 
game_reslt1 -0.032  0.029  0.052                                          
s_game_c    -0.035  0.144 -0.023  0.169                                   
ar_clssfct1 -0.792  0.001  0.000 -0.006 -0.002                            
tenure_yr_c -0.055 -0.045  0.086  0.001  0.026 -0.001                     
totl_rvns_z  0.071 -0.002 -0.066 -0.058 -0.047 -0.014 -0.781              
confrncBg10 -0.673  0.004  0.003  0.003  0.001  0.216  0.033 -0.050       
confrncBg12 -0.354 -0.004  0.004  0.002  0.005  0.001  0.050 -0.070  0.478
confrncPc12 -0.594 -0.009 -0.002  0.006  0.000  0.226 -0.030  0.020  0.614
conferncSEC -0.486 -0.009  0.005  0.003  0.000  0.001  0.051 -0.060  0.659
            cnfB12 cnfP12
gm_tm_nm__2              
attndnc_sc_              
game_reslt1              
s_game_c                 
ar_clssfct1              
tenure_yr_c              
totl_rvns_z              
confrncBg10              
confrncBg12              
confrncPc12  0.394       
conferncSEC  0.462  0.546

performance::icc(model_school_r_1)

Warning: Random slopes not present as fixed effects. This artificially inflates
  the conditional random effect variances.
  Respecify the fixed effects structure of your model (add random slopes
  as fixed effects).

# Intraclass Correlation Coefficient

    Adjusted ICC: 0.790
  Unadjusted ICC: 0.668

plot(model_school_r_1, type=c("p","smooth"))

There is no strong curvature. The linearity assumption is mostly reasonable.
The variability of the residuals seems to be non-constant (along the horizontal line at 0), suggesting a violation of the equal variance assumption.

qqnorm(resid(model_school_r_1)); qqline(resid(model_school_r_1))

The QQ plot indicates that the distribution of the residuals appears to have heavier tails than the normal distribution, though it look symmetical.

qqnorm(ranef(model_school_r_1)$school[, "(Intercept)"]);qqline(ranef(model_school_r_1)$school[, "(Intercept)"])

The random intercept roughly follows the reference line, so the random intercepts appear approximately normally distributed.

mf <- model.frame(model_school_r_1)
res <- residuals(model_school_r_1)
par(mfrow=c(2,3))
plot(res ~ mf$game_time_num_c_2,
main = "Residuals vs Game Time (centered)",
xlab = "Game Time (centered)", ylab = "Residuals")
plot(res ~ mf$attendance_school_z,
main = "Residuals vs Attendance (Z-scoring)",
xlab = "Attendance (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$total_revenues_z,
main = "Residuals vs Total Revenues",
xlab = "Total Revenues (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$s_game_c,
main = "Residuals vs Season Game (centered)",
xlab = "Season Game (centered)", ylab = "Residuals")
plot(res ~ mf$tenure_year_c,
main = "Residuals vs Tenure (centered)",
xlab = "Tenure (centered)", ylab = "Residuals")
plot(res ~ mf$area_classification,
main = "Residuals vs Area (0/1)",
xlab = "Area (0/1)", ylab = "Residuals")

plot(res ~ mf$conference,
main = "Residuals vs Conference",
xlab = "Conference", ylab = "Residuals")

Random Slopes for tenure year

model_school_r_2 <- lmer(
s_diversion ~ game_time_num_c_2 + attendance_school_z+ game_result + s_game_c + area_classification + tenure_year_c + total_revenues_z +conference + (1+tenure_year_c|school),
data = data_clean
)

summary(model_school_r_2)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: s_diversion ~ game_time_num_c_2 + attendance_school_z + game_result +  
    s_game_c + area_classification + tenure_year_c + total_revenues_z +  
    conference + (1 + tenure_year_c | school)
   Data: data_clean

REML criterion at convergence: -1684

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.3705 -0.5636 -0.0676  0.4748  3.9509 

Random effects:
 Groups   Name          Variance Std.Dev. Corr 
 school   (Intercept)   0.058179 0.2412        
          tenure_year_c 0.001163 0.0341   -0.20
 Residual               0.012213 0.1105        
Number of obs: 1245, groups:  school, 27

Fixed effects:
                       Estimate Std. Error         df t value Pr(>|t|)  
(Intercept)           5.225e-01  1.965e-01  2.087e+01   2.659   0.0147 *
game_time_num_c_2    -3.605e-04  1.150e-03  1.196e+03  -0.313   0.7540  
attendance_school_z   2.736e-03  3.417e-03  1.209e+03   0.801   0.4234  
game_result1          8.230e-03  7.594e-03  1.196e+03   1.084   0.2787  
s_game_c             -5.605e-04  1.637e-03  1.191e+03  -0.342   0.7321  
area_classification1 -7.620e-02  1.557e-01  2.072e+01  -0.490   0.6296  
tenure_year_c         7.593e-03  7.401e-03  2.509e+01   1.026   0.3148  
total_revenues_z      2.377e-02  9.252e-03  1.221e+03   2.569   0.0103 *
conferenceBig10      -1.402e-01  1.439e-01  2.038e+01  -0.974   0.3415  
conferenceBig12      -2.587e-01  2.064e-01  2.067e+01  -1.253   0.2241  
conferencePac12      -2.896e-03  1.775e-01  2.245e+01  -0.016   0.9871  
conferenceSEC        -2.301e-01  1.492e-01  2.049e+01  -1.543   0.1382  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) g____2 attn__ gm_rs1 s_gm_c ar_cl1 tnr_y_ ttl_r_ cnfB10
gm_tm_nm__2 -0.021                                                        
attndnc_sc_ -0.010 -0.116                                                 
game_reslt1 -0.032  0.040  0.097                                          
s_game_c    -0.032  0.148 -0.046  0.169                                   
ar_clssfct1 -0.792  0.001  0.002 -0.001  0.000                            
tenure_yr_c -0.053 -0.014  0.049 -0.018  0.007 -0.011                     
totl_rvns_z  0.065 -0.004 -0.087 -0.036 -0.032 -0.018 -0.181              
confrncBg10 -0.672  0.005  0.010  0.005  0.001  0.217  0.008 -0.041       
confrncBg12 -0.350 -0.004  0.012  0.005  0.006  0.001  0.007 -0.044  0.475
confrncPc12 -0.593 -0.008  0.004  0.018  0.003  0.242 -0.043  0.013  0.603
conferncSEC -0.484 -0.008  0.005  0.002 -0.001  0.001  0.019 -0.052  0.658
            cnfB12 cnfP12
gm_tm_nm__2              
attndnc_sc_              
game_reslt1              
s_game_c                 
ar_clssfct1              
tenure_yr_c              
totl_rvns_z              
confrncBg10              
confrncBg12              
confrncPc12  0.384       
conferncSEC  0.459  0.531

performance::icc(model_school_r_2)

# Intraclass Correlation Coefficient

    Adjusted ICC: 0.891
  Unadjusted ICC: 0.824

plot(model_school_r_2, type=c("p","smooth"))

There is no strong curvature. The linearity assumption is mostly reasonable.
The variability of the residuals seems to be non-constant (along the horizontal line at 0), suggesting a violation of the equal variance assumption of the error term.

qqnorm(resid(model_school_r_2)); qqline(resid(model_school_r_2))

The QQ plot indicates that the distribution of the residuals appears to have heavier tails than the normal distribution, though it look symmetical.

qqnorm(ranef(model_school_r_2)$school[, "(Intercept)"]);qqline(ranef(model_school_r_2)$school[, "(Intercept)"])

The random intercept does not roughly follow the reference line, especially the head and tail, so the random intercepts do not appear approximately normally distributed.

mf <- model.frame(model_school_r_2)
res <- residuals(model_school_r_2)
par(mfrow=c(2,3))
plot(res ~ mf$game_time_num_c_2,
main = "Residuals vs Game Time (centered)",
xlab = "Game Time (centered)", ylab = "Residuals")
plot(res ~ mf$attendance_school_z,
main = "Residuals vs Attendance (Z-scoring)",
xlab = "Attendance (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$total_revenues_z,
main = "Residuals vs Total Revenues",
xlab = "Total Revenues (Z-scoring)", ylab = "Residuals")
plot(res ~ mf$s_game_c,
main = "Residuals vs Season Game (centered)",
xlab = "Season Game (centered)", ylab = "Residuals")
plot(res ~ mf$tenure_year_c,
main = "Residuals vs Tenure (centered)",
xlab = "Tenure (centered)", ylab = "Residuals")
plot(res ~ mf$area_classification,
main = "Residuals vs Area (0/1)",
xlab = "Area (0/1)", ylab = "Residuals")

plot(res ~ mf$conference,
main = "Residuals vs Conference",
xlab = "Conference", ylab = "Residuals")