── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ purrr 1.0.4
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)library(lme4)
Loading required package: Matrix
Attaching package: 'Matrix'
The following objects are masked from 'package:tidyr':
expand, pack, unpack
Import the Data
Clean the data
data_clean <- data %>%select(`Conference`, `School`, `Area Classification (0-Rural; 1-Urban)`, `Year`, `Tenure Year`, `In-Season_Game`, `S_Diversion`, `Attendance`, `Game Time`,`Game result (Win=1; Loss=0)`,`Athletic Dept Profit`, `Athletic Dept Total Expenses`, `Athletic Dept Total Revenues`) # select the columndata_clean$GameTime_numeric <-as.numeric(format(data_clean$`Game Time`, "%H")) +as.numeric(format(data_clean$`Game Time`, "%M"))/60# convert game time to numerical variable and create a new columndata_clean$`Game Time`=format(data_clean$`Game Time`, format ="%H:%M") # avoid game time impacted by computer system datedata_clean <- data_clean %>%mutate(`Game Time`=as.character(`Game Time`)) %>%mutate(`Area Classification (0-Rural; 1-Urban)`=as.character(`Area Classification (0-Rural; 1-Urban)`)) %>%mutate(`Attendance`=as.numeric(`Attendance`)) # Convert the variable to its suitable data type.
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `Attendance = as.numeric(Attendance)`.
Caused by warning:
! NAs introduced by coercion
data_clean <- data_clean %>%mutate(`Game result (Win=1; Loss=0)`=na_if(`Game result (Win=1; Loss=0)`, "N/A")) %>%mutate(across(all_of(cols_to_factor), as.factor)) # Make sure there is no NA level.
data_clean <-subset(data_clean, !is.na(`Game result (Win=1; Loss=0)`)) # Clean the game that is being cancelled.
data_clean$GameTime_numeric_c_1 <-with(data_clean, ifelse( GameTime_numeric >=9& GameTime_numeric <12, 1, # Morningifelse(GameTime_numeric >=12& GameTime_numeric <15.5, 2, # noonifelse(GameTime_numeric >=15.5& GameTime_numeric <19, 3, # afternoonifelse(GameTime_numeric >=19, 4, # eveningNA))) )) # Classify game time into four time slots.
data_clean$GameTime_numeric_c_2 <- data_clean$GameTime_numeric-12# centralize game time by 12
data_clean$`In-Season_Game_Centered`<-with(data_clean,ifelse(`In-Season_Game`==1, 0,ifelse(`In-Season_Game`==2, 1,ifelse(`In-Season_Game`==3, 2,ifelse(`In-Season_Game`==4, 3,ifelse(`In-Season_Game`==5, 4,ifelse(`In-Season_Game`==6, 5,ifelse(`In-Season_Game`==7, 6,ifelse(`In-Season_Game`==8, 7,ifelse(`In-Season_Game`==9, 8, NA)))))))))) # Centralize in the in-season game to 0.
data_clean <- data_clean %>% dplyr::rename(`conference`=`Conference`,`school`=`School`,`area_classification`=`Area Classification (0-Rural; 1-Urban)`,`year`=`Year`,`tenure_year`=`Tenure Year`,`s_game`=`In-Season_Game`,`s_diversion`=`S_Diversion`,`attendance`=`Attendance`,`game_time`=`Game Time`,`game_result`=`Game result (Win=1; Loss=0)`,`profit`=`Athletic Dept Profit`,`total_expenses`=`Athletic Dept Total Expenses`,`total_revenues`=`Athletic Dept Total Revenues`,`game_time_chars_c_1`=`GameTime_numeric_c_1`,`game_time_num_c_2`=`GameTime_numeric_c_2`,`s_game_c`=`In-Season_Game_Centered`,`tenure_year_c`=`Tenure Year Centered`,`year_c`=`Year_Centered` ) %>%select(-`GameTime_numeric`) # rename the columns.data_clean$game_time_chars_c_1 <-as.factor(data_clean$game_time_chars_c_1) # Change the variable into a factor variable.
data_clean %>%ggplot(mapping =aes(x = game_time_chars_c_1, y = s_diversion, colour =factor(conference))) +geom_point() +geom_smooth(mapping =aes(group = conference), method ="loess", se =FALSE, fullrange =TRUE) +labs(title ="s_diversion vs. game time (level by conference)",colour ="Conference") +theme_classic()+theme(plot.title =element_text(hjust =0.5, face ="bold"))
`geom_smooth()` using formula = 'y ~ x'
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 0.985
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 7.9026e-16
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 1
data_clean %>%ggplot(mapping =aes(x = game_time_num_c_2, y = s_diversion, colour =factor(conference))) +geom_point() +geom_smooth(mapping =aes(group = conference), method ="loess", se =FALSE, fullrange =TRUE) +labs(title ="s_diversion vs. game time (level by conference)",colour ="Conference") +theme_classic()+theme(plot.title =element_text(hjust =0.5, face ="bold"))
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 73 rows containing missing values or values outside the scale range
(`geom_smooth()`).
Linear mixed model fit by REML ['lmerMod']
Formula: s_diversion ~ 1 + (1 | year)
Data: data_clean
REML criterion at convergence: 424.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.1594 -0.7537 -0.2390 0.8449 2.2797
Random effects:
Groups Name Variance Std.Dev.
year (Intercept) 0.01307 0.1143
Residual 0.07647 0.2765
Number of obs: 1390, groups: year, 22
Fixed effects:
Estimate Std. Error t value
(Intercept) 0.37894 0.02623 14.45
Random effects year (Intercept): 0.01307 There’s some variability in average s_diversion across years.
Fixed effects (Intercept): 0.37894 The overall average s_diversion is 0.37894 across all years.
Linear mixed model fit by REML ['lmerMod']
Formula: s_diversion ~ 1 + (1 | school)
Data: data_clean
REML criterion at convergence: -1126.1
Scaled residuals:
Min 1Q Median 3Q Max
-3.4297 -0.4986 -0.0120 0.5227 3.1356
Random effects:
Groups Name Variance Std.Dev.
school (Intercept) 0.06163 0.2482
Residual 0.02346 0.1532
Number of obs: 1390, groups: school, 31
Fixed effects:
Estimate Std. Error t value
(Intercept) 0.37253 0.04489 8.299
Random effects year (Intercept): 0.06163 There is variability in the average s_diversion across schools.
Fixed effects (Intercept): 0.37253 The overall average s_diversion is 0.37253, across all schools.
library("sjPlot")plot_model(school_model_1_null, type ='re', facet.grid =FALSE, sort.est ="sort.all", y.offset = .4)
Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.
Linear mixed model fit by REML ['lmerMod']
Formula: s_diversion ~ 1 + (1 | school) + (1 | school:year)
Data: data_clean
REML criterion at convergence: -2448.2
Scaled residuals:
Min 1Q Median 3Q Max
-5.6732 -0.4055 -0.0140 0.3816 5.6324
Random effects:
Groups Name Variance Std.Dev.
school:year (Intercept) 0.019235 0.13869
school (Intercept) 0.058885 0.24266
Residual 0.005853 0.07651
Number of obs: 1390, groups: school:year, 214; school, 31
Fixed effects:
Estimate Std. Error t value
(Intercept) 0.37444 0.04518 8.288
Random effects year (Intercept): school:year 0.019235 school 0.058885 There is variance between schools, and the schools change over the years.
Fixed effects:0.37444 The overall average s_diversion is 0.37444, across all schools and years.
plot_model(two_level_model_null, type ='re', facet.grid =FALSE, sort.est ="sort.all", y.offset = .4)
Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.
Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.
Var1 Freq
1 Arizona State 33
2 Arkansas 6
3 Auburn 36
4 Clemson 28
5 Colorado University 59
6 Duke 38
7 Florida 43
8 Georgia 36
9 Georgia Tech 33
10 Illinois 28
11 Iowa 16
12 Kentucky 34
13 LSU 36
14 Maryland 29
15 Michigan 143
16 Michigan State 21
17 Minnesota 42
18 NC State 31
19 Ohio State 123
20 Oklahoma 23
21 Penn State 82
22 Purdue 55
23 Stanford 32
24 Tennessee 77
25 UCLA 7
26 UNC 100
27 UoTexas 54
28 USC 16
29 Washington 32
30 Washington State 42
31 Wisconsin 55
ggplot(frequency_table_school, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for School Categories", x ="Categories", y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(angle =90, hjust =1, size =8))