Stadium_Waste-Descriptive_Analysis_3

Author

Jingyi Yang

Install Packages

library(skimr)
library(summarytools)
library(readxl)
library("readr")
library("tidyverse")
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ purrr     1.0.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tibble::view()  masks summarytools::view()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

Import the Data

Warning: Expecting numeric in AK146 / R146C37: got a date
Warning: Expecting numeric in AL146 / R146C38: got a date
Warning: Expecting numeric in AK147 / R147C37: got a date
Warning: Expecting numeric in AL147 / R147C38: got a date
Warning: Expecting numeric in AK148 / R148C37: got a date
Warning: Expecting numeric in AL148 / R148C38: got a date
Warning: Expecting numeric in AK341 / R341C37: got a date
Warning: Expecting numeric in AL341 / R341C38: got a date
Warning: Expecting numeric in AK342 / R342C37: got a date
Warning: Expecting numeric in AL342 / R342C38: got a date
Warning: Expecting numeric in AK343 / R343C37: got a date
Warning: Expecting numeric in AL343 / R343C38: got a date
Warning: Expecting numeric in AK373 / R373C37: got a date
Warning: Expecting numeric in AL373 / R373C38: got a date
Warning: Expecting numeric in AK374 / R374C37: got a date
Warning: Expecting numeric in AL374 / R374C38: got a date
Warning: Expecting numeric in AK375 / R375C37: got a date
Warning: Expecting numeric in AL375 / R375C38: got a date
Warning: Expecting numeric in AK376 / R376C37: got a date
Warning: Expecting numeric in AL376 / R376C38: got a date
Warning: Expecting numeric in AK377 / R377C37: got a date
Warning: Expecting numeric in AL377 / R377C38: got a date
Warning: Expecting numeric in AK865 / R865C37: got a date
Warning: Expecting numeric in AL865 / R865C38: got a date
Warning: Expecting numeric in AK866 / R866C37: got a date
Warning: Expecting numeric in AL866 / R866C38: got a date
Warning: Expecting numeric in AK867 / R867C37: got a date
Warning: Expecting numeric in AL867 / R867C38: got a date
Warning: Expecting numeric in AK868 / R868C37: got a date
Warning: Expecting numeric in AL868 / R868C38: got a date
Collected_Data_old_select <- Collected_Data_old %>% select(Conference,School,Year,`Tenure Year`,S_Game,S_Diversion,Attendance,Date)
Collected_Data_old_renamed <- Collected_Data_old_select %>%
  rename(
     conference = Conference,
     school = School,
     year = Year,
     tenure_year = `Tenure Year`,
     s_diversion = S_Diversion,
     attendance = Attendance,
     Date = Date
  ) %>% mutate(attendance = as.numeric(attendance))%>%
  mutate(s_diversion = na_if(s_diversion, "#DIV/0!")) %>%
  mutate(s_diversion = as.numeric(s_diversion))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `attendance = as.numeric(attendance)`.
Caused by warning:
! NAs introduced by coercion
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `s_diversion = as.numeric(s_diversion)`.
Caused by warning:
! NAs introduced by coercion
str(Collected_Data_old_renamed)
tibble [1,396 × 8] (S3: tbl_df/tbl/data.frame)
 $ conference : chr [1:1396] "Big10" "Big10" "Big10" "Big10" ...
 $ school     : chr [1:1396] "Iowa" "Iowa" "Iowa" "Iowa" ...
 $ year       : num [1:1396] 2018 2018 2018 2018 2015 ...
 $ tenure_year: num [1:1396] 3 3 3 3 4 5 4 1 8 7 ...
 $ S_Game     : num [1:1396] 4 5 6 7 6 3 5 3 6 4 ...
 $ s_diversion: num [1:1396] NA NA NA NA NA ...
 $ attendance : num [1:1396] 69250 69250 66493 65299 85821 ...
 $ Date       : chr [1:1396] "9/22/2018" "10/20/2018" "11/10/2018" "11/23/2018" ...
Collected_Data_clean <- Collected_Data %>% select(conference, confCode,school,school_ID,year,year_0,tenure_year,tenure_0,game_number,game_number2,game_0,s_diversion,attendance,game_time,game_min,game_time_hr_0,time_0,game_result,Date)%>%
  mutate(s_diversion = na_if(s_diversion, "#DIV/0!")) %>%
  mutate(s_diversion = as.numeric(s_diversion)) %>% mutate(attendance = as.numeric(attendance)) %>% mutate(game_time= as.character(game_time))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `attendance = as.numeric(attendance)`.
Caused by warning:
! NAs introduced by coercion
head(Collected_Data_clean)
# A tibble: 6 × 19
  conference confCode school     school_ID  year year_0 tenure_year tenure_0
  <chr>         <dbl> <chr>          <dbl> <dbl>  <dbl>       <dbl>    <dbl>
1 Big10             1 Iowa              10  2018     15           3        2
2 Big10             1 Iowa              10  2018     15           3        2
3 Big10             1 Iowa              10  2018     15           3        2
4 Big10             1 Iowa              10  2018     15           3        2
5 Big12             2 Oklahoma          19  2015     12           4        3
6 Big10             1 Ohio State        18  2014     11           8        7
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
str(Collected_Data_clean)
tibble [1,240 × 19] (S3: tbl_df/tbl/data.frame)
 $ conference    : chr [1:1240] "Big10" "Big10" "Big10" "Big10" ...
 $ confCode      : num [1:1240] 1 1 1 1 2 1 1 1 1 1 ...
 $ school        : chr [1:1240] "Iowa" "Iowa" "Iowa" "Iowa" ...
 $ school_ID     : num [1:1240] 10 10 10 10 19 18 18 18 18 18 ...
 $ year          : num [1:1240] 2018 2018 2018 2018 2015 ...
 $ year_0        : num [1:1240] 15 15 15 15 12 11 10 9 10 9 ...
 $ tenure_year   : num [1:1240] 3 3 3 3 4 8 7 6 7 6 ...
 $ tenure_0      : num [1:1240] 2 2 2 2 3 7 6 5 6 5 ...
 $ game_number   : num [1:1240] 17 18 19 20 24 55 47 42 49 43 ...
 $ game_number2  : num [1:1240] 4 5 6 7 6 6 4 7 7 8 ...
 $ game_0        : num [1:1240] 3 4 5 6 5 5 3 6 6 7 ...
 $ s_diversion   : num [1:1240] NA NA NA NA NA ...
 $ attendance    : num [1:1240] 69250 69250 66493 65299 85821 ...
 $ game_time     : chr [1:1240] "1899-12-31 19:35:00" "1899-12-31 11:00:00" "1899-12-31 14:30:00" "1899-12-31 11:00:00" ...
 $ game_min      : num [1:1240] 1175 660 870 660 1154 ...
 $ game_time_hr_0: num [1:1240] 7.58 -1 2.5 -1 7.23 ...
 $ time_0        : chr [1:1240] "2" "0" "1" "0" ...
 $ game_result   : chr [1:1240] "0.0" "1.0" "0.0" "1.0" ...
 $ Date          : chr [1:1240] "9/22/2018" "10/20/2018" "11/10/2018" "11/23/2018" ...

Correct “confCode” and “school_ID” columns

skim(Collected_Data_clean$conference)
Data summary
Name Collected_Data_clean$conf…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
character 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
data 0 1 3 5 0 5 0
skim(Collected_Data_clean$confCode)
Data summary
Name Collected_Data_clean$conf…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 15 0.99 1.88 1.44 0 1 1 3 4 ▃▇▁▃▅
skim(Collected_Data$school)
Data summary
Name Collected_Data$school
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
character 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
data 0 1 3 19 0 30 0
skim(Collected_Data_clean$school_ID)
Data summary
Name Collected_Data_clean$scho…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 15 0.99 16.37 8.14 1 9 18 23 30 ▅▅▇▇▆
subset(Collected_Data_clean, is.na(school_ID))
# A tibble: 15 × 19
   conference confCode school     school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>          <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 Pac12            NA USC               NA  2020     NA           6       NA
 2 Pac12            NA USC               NA  2020     NA           6       NA
 3 Pac12            NA USC               NA  2020     NA           6       NA
 4 SEC              NA Georgia           NA  2020     NA           6       NA
 5 SEC              NA Georgia           NA  2020     NA           6       NA
 6 SEC              NA Georgia           NA  2020     NA           6       NA
 7 SEC              NA Kentucky          NA  2020     NA           5       NA
 8 SEC              NA Kentucky          NA  2020     NA           5       NA
 9 SEC              NA Kentucky          NA  2020     NA           5       NA
10 SEC              NA Kentucky          NA  2020     NA           5       NA
11 SEC              NA Kentucky          NA  2020     NA           5       NA
12 Big10            NA Penn State        NA  2021     NA          13       NA
13 Big10            NA Penn State        NA  2021     NA          13       NA
14 Big10            NA Penn State        NA  2021     NA          13       NA
15 Big10            NA Penn State        NA  2021     NA          13       NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>% filter(school=="USC")
# A tibble: 16 × 19
   conference confCode school school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>      <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 Pac12             3 USC           27  2019     16           5        4
 2 Pac12             3 USC           27  2019     16           5        4
 3 Pac12             3 USC           27  2019     16           5        4
 4 Pac12             3 USC           27  2019     16           5        4
 5 Pac12             3 USC           27  2019     16           5        4
 6 Pac12             3 USC           27  2019     16           5        4
 7 Pac12            NA USC           NA  2020     NA           6       NA
 8 Pac12            NA USC           NA  2020     NA           6       NA
 9 Pac12            NA USC           NA  2020     NA           6       NA
10 Pac12             3 USC           27  2015     12           1        0
11 Pac12             3 USC           27  2015     12           1        0
12 Pac12             3 USC           27  2015     12           1        0
13 Pac12             3 USC           27  2015     12           1        0
14 Pac12             3 USC           27  2015     12           1        0
15 Pac12             3 USC           27  2015     12           1        0
16 Pac12             3 USC           27  2015     12           1        0
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>% filter(school=="Georgia")
# A tibble: 36 × 19
   conference confCode school  school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>       <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 SEC              NA Georgia        NA  2020     NA           6       NA
 2 SEC              NA Georgia        NA  2020     NA           6       NA
 3 SEC              NA Georgia        NA  2020     NA           6       NA
 4 SEC               4 Georgia         8  2016     13           2        1
 5 SEC               4 Georgia         8  2017     14           3        2
 6 SEC               4 Georgia         8  2017     14           3        2
 7 SEC               4 Georgia         8  2019     16           5        4
 8 SEC               4 Georgia         8  2017     14           3        2
 9 SEC               4 Georgia         8  2018     15           4        3
10 SEC               4 Georgia         8  2019     16           5        4
# ℹ 26 more rows
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>% filter(school=="Kentucky")
# A tibble: 34 × 19
   conference confCode school   school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>        <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 SEC              NA Kentucky        NA  2020     NA           5       NA
 2 SEC              NA Kentucky        NA  2020     NA           5       NA
 3 SEC              NA Kentucky        NA  2020     NA           5       NA
 4 SEC              NA Kentucky        NA  2020     NA           5       NA
 5 SEC              NA Kentucky        NA  2020     NA           5       NA
 6 SEC               4 Kentucky        11  2019     16           4        3
 7 SEC               4 Kentucky        11  2019     16           4        3
 8 SEC               4 Kentucky        11  2019     16           4        3
 9 SEC               4 Kentucky        11  2019     16           4        3
10 SEC               4 Kentucky        11  2019     16           4        3
# ℹ 24 more rows
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>% filter(school=="Penn State")
# A tibble: 82 × 19
   conference confCode school     school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>          <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 Big10             1 Penn State        20  2013     10           5        4
 2 Big10             1 Penn State        20  2014     11           6        5
 3 Big10             1 Penn State        20  2013     10           5        4
 4 Big10             1 Penn State        20  2013     10           5        4
 5 Big10             1 Penn State        20  2012      9           4        3
 6 Big10             1 Penn State        20  2011      8           3        2
 7 Big10             1 Penn State        20  2015     12           7        6
 8 Big10             1 Penn State        20  2012      9           4        3
 9 Big10             1 Penn State        20  2016     13           8        7
10 Big10             1 Penn State        20  2011      8           3        2
# ℹ 72 more rows
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>% filter(conference=="ACC")
# A tibble: 230 × 19
   conference confCode school       school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>            <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 ACC               0 Duke                 6  2015     12           2        1
 2 ACC               0 Georgia Tech         9  2016     13           2        1
 3 ACC               0 Duke                 6  2016     13           3        2
 4 ACC               0 Georgia Tech         9  2017     14           3        2
 5 ACC               2 Clemson              4  2017     14           2        1
 6 ACC               0 Duke                 6  2019     16           6        5
 7 ACC               2 Clemson              4  2018     15           3        2
 8 ACC               2 Clemson              4  2018     15           3        2
 9 ACC               0 Duke                 6  2018     15           5        4
10 ACC               0 Georgia Tech         9  2019     16           5        4
# ℹ 220 more rows
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>% filter(conference=="ACC")
# A tibble: 230 × 19
   conference confCode school       school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>            <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 ACC               0 Duke                 6  2015     12           2        1
 2 ACC               0 Georgia Tech         9  2016     13           2        1
 3 ACC               0 Duke                 6  2016     13           3        2
 4 ACC               0 Georgia Tech         9  2017     14           3        2
 5 ACC               2 Clemson              4  2017     14           2        1
 6 ACC               0 Duke                 6  2019     16           6        5
 7 ACC               2 Clemson              4  2018     15           3        2
 8 ACC               2 Clemson              4  2018     15           3        2
 9 ACC               0 Duke                 6  2018     15           5        4
10 ACC               0 Georgia Tech         9  2019     16           5        4
# ℹ 220 more rows
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean$confCode[Collected_Data_clean$school == "USC"] <- 3
Collected_Data_clean$confCode[Collected_Data_clean$school == "Georgia"] <- 4
Collected_Data_clean$confCode[Collected_Data_clean$school == "Kentucky"] <- 4
Collected_Data_clean$confCode[Collected_Data_clean$school == "Penn State"] <- 1
Collected_Data_clean$confCode[Collected_Data_clean$conference == "ACC"] <- 0
Collected_Data_clean$school_ID[Collected_Data_clean$school == "USC"] <- 27
Collected_Data_clean$school_ID[Collected_Data_clean$school == "Georgia"] <- 8
Collected_Data_clean$school_ID[Collected_Data_clean$school == "Kentucky"] <- 11
Collected_Data_clean$school_ID[Collected_Data_clean$school == "Penn State"] <- 20
skim(Collected_Data_clean$confCode)
Data summary
Name Collected_Data_clean$conf…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 0 1 1.85 1.47 0 1 1 3 4 ▃▇▁▃▅
skim(Collected_Data_clean$school_ID)
Data summary
Name Collected_Data_clean$scho…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 0 1 16.36 8.13 1 9 18 23 30 ▅▅▇▇▆
Collected_Data_clean %>% filter(conference=="ACC")
# A tibble: 230 × 19
   conference confCode school       school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>            <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 ACC               0 Duke                 6  2015     12           2        1
 2 ACC               0 Georgia Tech         9  2016     13           2        1
 3 ACC               0 Duke                 6  2016     13           3        2
 4 ACC               0 Georgia Tech         9  2017     14           3        2
 5 ACC               0 Clemson              4  2017     14           2        1
 6 ACC               0 Duke                 6  2019     16           6        5
 7 ACC               0 Clemson              4  2018     15           3        2
 8 ACC               0 Clemson              4  2018     15           3        2
 9 ACC               0 Duke                 6  2018     15           5        4
10 ACC               0 Georgia Tech         9  2019     16           5        4
# ℹ 220 more rows
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
table(Collected_Data_clean$conference)

  ACC Big10 Big12 Pac12   SEC 
  230   487    33   221   269 
table(Collected_Data_clean$confCode)

  0   1   2   3   4 
230 487  33 221 269 
table(Collected_Data_clean$school)

      Arizona State            Arkansas              Auburn             Clemson 
                 33                   6                  36                  28 
Colorado University                Duke             Florida             Georgia 
                 59                  38                  43                  36 
       Georgia Tech                Iowa            Kentucky                 LSU 
                 33                  20                  34                  37 
           Maryland            Michigan      Michigan State           Minnesota 
                 29                 113                  21                  21 
           NC State          Ohio State            Oklahoma          Penn State 
                 31                  91                  24                  82 
             Purdue            Stanford           Tennessee                UCLA 
                 55                  32                  77                   7 
                UNC             UoTexas                 USC          Washington 
                100                   9                  16                  32 
   Washington State           Wisconsin 
                 42                  55 
table(Collected_Data_clean$school_ID)

  1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20 
 33   6  36  28  59  38  43  36  33  20  34  37  29 113  21  21  31  91  24  82 
 21  22  23  24  25  26  27  28  29  30 
 55  32  77   7 100   9  16  32  42  55 

Other missing value

skim(Collected_Data_old_renamed$S_Game)
Data summary
Name Collected_Data_old_rename…
Number of rows 1396
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 0 1 3.87 1.98 1 2 4 6 9 ▇▇▃▆▁
skim(Collected_Data_clean$s_diversion)
Data summary
Name Collected_Data_clean$s_di…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 5 1 0.37 0.28 0 0.15 0.28 0.57 0.99 ▇▇▃▂▃
skim(Collected_Data_clean$attendance)
Data summary
Name Collected_Data_clean$atte…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 2 1 72662 27342.12 12000 48566.25 75713.5 99052.5 115109 ▂▇▅▆▇
skim(Collected_Data_clean$Date)
Data summary
Name Collected_Data_clean$Date
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
character 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
data 378 0.7 6 14 0 379 0
skim(Collected_Data_clean$year_0)
Data summary
Name Collected_Data_clean$year…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 15 0.99 11.25 4.06 0 9 12 14 16 ▁▂▂▃▇
skim(Collected_Data_clean$tenure_0)
Data summary
Name Collected_Data_clean$tenu…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 15 0.99 4.35 3.99 0 1 3 7 16 ▇▃▂▁▁
skim(Collected_Data_clean$game_0)
Data summary
Name Collected_Data_clean$game…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
numeric 1
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
data 15 0.99 2.87 1.96 0 1 3 5 7 ▇▅▇▃▃
skim(Collected_Data_clean$time_0)
Data summary
Name Collected_Data_clean$time…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
character 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
data 15 0.99 1 1 0 3 0
skim(Collected_Data_clean$game_result)
Data summary
Name Collected_Data_clean$game…
Number of rows 1240
Number of columns 1
_______________________
Column type frequency:
character 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
data 5 1 3 3 0 3 0
subset(Collected_Data_clean, is.na(year_0))
# A tibble: 15 × 19
   conference confCode school     school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>          <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 Pac12             3 USC               27  2020     NA           6       NA
 2 Pac12             3 USC               27  2020     NA           6       NA
 3 Pac12             3 USC               27  2020     NA           6       NA
 4 SEC               4 Georgia            8  2020     NA           6       NA
 5 SEC               4 Georgia            8  2020     NA           6       NA
 6 SEC               4 Georgia            8  2020     NA           6       NA
 7 SEC               4 Kentucky          11  2020     NA           5       NA
 8 SEC               4 Kentucky          11  2020     NA           5       NA
 9 SEC               4 Kentucky          11  2020     NA           5       NA
10 SEC               4 Kentucky          11  2020     NA           5       NA
11 SEC               4 Kentucky          11  2020     NA           5       NA
12 Big10             1 Penn State        20  2021     NA          13       NA
13 Big10             1 Penn State        20  2021     NA          13       NA
14 Big10             1 Penn State        20  2021     NA          13       NA
15 Big10             1 Penn State        20  2021     NA          13       NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
subset(Collected_Data_clean, is.na(tenure_0))
# A tibble: 15 × 19
   conference confCode school     school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>          <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 Pac12             3 USC               27  2020     NA           6       NA
 2 Pac12             3 USC               27  2020     NA           6       NA
 3 Pac12             3 USC               27  2020     NA           6       NA
 4 SEC               4 Georgia            8  2020     NA           6       NA
 5 SEC               4 Georgia            8  2020     NA           6       NA
 6 SEC               4 Georgia            8  2020     NA           6       NA
 7 SEC               4 Kentucky          11  2020     NA           5       NA
 8 SEC               4 Kentucky          11  2020     NA           5       NA
 9 SEC               4 Kentucky          11  2020     NA           5       NA
10 SEC               4 Kentucky          11  2020     NA           5       NA
11 SEC               4 Kentucky          11  2020     NA           5       NA
12 Big10             1 Penn State        20  2021     NA          13       NA
13 Big10             1 Penn State        20  2021     NA          13       NA
14 Big10             1 Penn State        20  2021     NA          13       NA
15 Big10             1 Penn State        20  2021     NA          13       NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>% filter(year=="2020")
# A tibble: 11 × 19
   conference confCode school   school_ID  year year_0 tenure_year tenure_0
   <chr>         <dbl> <chr>        <dbl> <dbl>  <dbl>       <dbl>    <dbl>
 1 Pac12             3 USC             27  2020     NA           6       NA
 2 Pac12             3 USC             27  2020     NA           6       NA
 3 Pac12             3 USC             27  2020     NA           6       NA
 4 SEC               4 Georgia          8  2020     NA           6       NA
 5 SEC               4 Georgia          8  2020     NA           6       NA
 6 SEC               4 Georgia          8  2020     NA           6       NA
 7 SEC               4 Kentucky        11  2020     NA           5       NA
 8 SEC               4 Kentucky        11  2020     NA           5       NA
 9 SEC               4 Kentucky        11  2020     NA           5       NA
10 SEC               4 Kentucky        11  2020     NA           5       NA
11 SEC               4 Kentucky        11  2020     NA           5       NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>% filter(year=="2021")
# A tibble: 4 × 19
  conference confCode school     school_ID  year year_0 tenure_year tenure_0
  <chr>         <dbl> <chr>          <dbl> <dbl>  <dbl>       <dbl>    <dbl>
1 Big10             1 Penn State        20  2021     NA          13       NA
2 Big10             1 Penn State        20  2021     NA          13       NA
3 Big10             1 Penn State        20  2021     NA          13       NA
4 Big10             1 Penn State        20  2021     NA          13       NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
#   s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
#   game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>

Create final dataset

data <- right_join(Collected_Data_old_renamed, Collected_Data_clean, by= c("conference", "school","year", "tenure_year", "s_diversion", "attendance" , "Date"))

Descriptive Analysis

str(data)
tibble [1,240 × 20] (S3: tbl_df/tbl/data.frame)
 $ conference    : chr [1:1240] "Big10" "Big10" "Big10" "Big10" ...
 $ school        : chr [1:1240] "Iowa" "Iowa" "Iowa" "Iowa" ...
 $ year          : num [1:1240] 2018 2018 2018 2018 2015 ...
 $ tenure_year   : num [1:1240] 3 3 3 3 4 5 4 1 8 7 ...
 $ S_Game        : num [1:1240] 4 5 6 7 6 3 5 3 6 4 ...
 $ s_diversion   : num [1:1240] NA NA NA NA NA ...
 $ attendance    : num [1:1240] 69250 69250 66493 65299 85821 ...
 $ Date          : chr [1:1240] "9/22/2018" "10/20/2018" "11/10/2018" "11/23/2018" ...
 $ confCode      : num [1:1240] 1 1 1 1 2 3 3 4 1 1 ...
 $ school_ID     : num [1:1240] 10 10 10 10 19 1 1 7 18 18 ...
 $ year_0        : num [1:1240] 15 15 15 15 12 16 15 10 11 10 ...
 $ tenure_0      : num [1:1240] 2 2 2 2 3 4 3 0 7 6 ...
 $ game_number   : num [1:1240] 17 18 19 20 24 29 25 3 55 47 ...
 $ game_number2  : num [1:1240] 4 5 6 7 6 3 5 3 6 4 ...
 $ game_0        : num [1:1240] 3 4 5 6 5 2 4 2 5 3 ...
 $ game_time     : chr [1:1240] "1899-12-31 19:35:00" "1899-12-31 11:00:00" "1899-12-31 14:30:00" "1899-12-31 11:00:00" ...
 $ game_min      : num [1:1240] 1175 660 870 660 1154 ...
 $ game_time_hr_0: num [1:1240] 7.58 -1 2.5 -1 7.23 ...
 $ time_0        : chr [1:1240] "2" "0" "1" "0" ...
 $ game_result   : chr [1:1240] "0.0" "1.0" "0.0" "1.0" ...
data%>% select_if(is.character) 
# A tibble: 1,240 × 6
   conference school        Date       game_time           time_0 game_result
   <chr>      <chr>         <chr>      <chr>               <chr>  <chr>      
 1 Big10      Iowa          9/22/2018  1899-12-31 19:35:00 2      0.0        
 2 Big10      Iowa          10/20/2018 1899-12-31 11:00:00 0      1.0        
 3 Big10      Iowa          11/10/2018 1899-12-31 14:30:00 1      0.0        
 4 Big10      Iowa          11/23/2018 1899-12-31 11:00:00 0      1.0        
 5 Big12      Oklahoma      11/21/2015 1899-12-31 19:14:00 2      1.0        
 6 Pac12      Arizona State 9/21/2019  1899-12-31 19:00:00 2      0.0        
 7 Pac12      Arizona State 11/3/2018  1899-12-31 13:00:00 1      1.0        
 8 SEC        Florida       10/5/2013  1899-12-31 19:00:00 2      1.0        
 9 Big10      Ohio State    11/22/2014 1899-12-31 12:00:00 0      1.0        
10 Big10      Ohio State    9/28/2013  1899-12-31 20:00:00 2      1.0        
# ℹ 1,230 more rows
 data <- data %>%
          mutate(across(all_of( c("conference","school","Date","game_time","time_0","game_result")), as.factor))
summary(data)
 conference                  school         year       tenure_year    
 ACC  :230   Michigan           :113   Min.   :2003   Min.   : 1.000  
 Big10:487   UNC                :100   1st Qu.:2012   1st Qu.: 2.000  
 Big12: 33   Ohio State         : 91   Median :2015   Median : 4.000  
 Pac12:221   Penn State         : 82   Mean   :2014   Mean   : 5.379  
 SEC  :269   Tennessee          : 77   3rd Qu.:2017   3rd Qu.: 8.000  
             Colorado University: 59   Max.   :2021   Max.   :17.000  
             (Other)            :718                                  
     S_Game       s_diversion       attendance            Date    
 Min.   :1.000   Min.   :0.0000   Min.   : 12000   9/15/2018: 11  
 1st Qu.:2.000   1st Qu.:0.1546   1st Qu.: 48566   10/3/2015:  8  
 Median :4.000   Median :0.2769   Median : 75714   11/7/2015:  8  
 Mean   :3.847   Mean   :0.3748   Mean   : 72662   9/1/2018 :  8  
 3rd Qu.:5.000   3rd Qu.:0.5701   3rd Qu.: 99053   9/23/2017:  8  
 Max.   :8.000   Max.   :0.9868   Max.   :115109   (Other)  :819  
 NA's   :136     NA's   :5        NA's   :2        NA's     :378  
    confCode       school_ID         year_0         tenure_0     
 Min.   :0.000   Min.   : 1.00   Min.   : 0.00   Min.   : 0.000  
 1st Qu.:1.000   1st Qu.: 9.00   1st Qu.: 9.00   1st Qu.: 1.000  
 Median :1.000   Median :18.00   Median :12.00   Median : 3.000  
 Mean   :1.848   Mean   :16.36   Mean   :11.25   Mean   : 4.353  
 3rd Qu.:3.000   3rd Qu.:23.00   3rd Qu.:14.00   3rd Qu.: 7.000  
 Max.   :4.000   Max.   :30.00   Max.   :16.00   Max.   :16.000  
                                 NA's   :15      NA's   :15      
  game_number      game_number2       game_0                    game_time  
 Min.   :  1.00   Min.   :1.000   Min.   :0.000   1899-12-31 12:00:00:301  
 1st Qu.: 11.00   1st Qu.:2.000   1st Qu.:1.000   1899-12-31 15:30:00:213  
 Median : 23.00   Median :4.000   Median :3.000   1899-12-31 19:30:00:105  
 Mean   : 29.79   Mean   :3.857   Mean   :2.874   1899-12-31 19:00:00: 97  
 3rd Qu.: 42.00   3rd Qu.:5.250   3rd Qu.:5.000   1899-12-31 12:30:00: 71  
 Max.   :113.00   Max.   :8.000   Max.   :7.000   1899-12-31 11:00:00: 60  
                                  NA's   :15      (Other)            :393  
    game_min      game_time_hr_0    time_0    game_result
 Min.   : 540.0   Min.   :-3.000   0   :462   0.0 :384   
 1st Qu.: 720.0   1st Qu.: 0.000   1   :392   1.0 :850   
 Median : 930.0   Median : 3.500   2   :371   N/A :  1   
 Mean   : 907.3   Mean   : 3.122   NA's: 15   NA's:  5   
 3rd Qu.:1080.0   3rd Qu.: 6.000                         
 Max.   :1230.0   Max.   : 8.500                         
                                                         

Not good for categorical variables

skim(data)
Data summary
Name data
Number of rows 1240
Number of columns 20
_______________________
Column type frequency:
factor 6
numeric 14
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
conference 0 1.00 FALSE 5 Big: 487, SEC: 269, ACC: 230, Pac: 221
school 0 1.00 FALSE 30 Mic: 113, UNC: 100, Ohi: 91, Pen: 82
Date 378 0.70 FALSE 379 9/1: 11, 10/: 8, 11/: 8, 9/1: 8
game_time 0 1.00 FALSE 50 189: 301, 189: 213, 189: 105, 189: 97
time_0 15 0.99 FALSE 3 0: 462, 1: 392, 2: 371
game_result 5 1.00 FALSE 3 1.0: 850, 0.0: 384, N/A: 1

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
year 0 1.00 2014.33 4.09 2003 2012.00 2015.00 2017.00 2021.00 ▁▂▂▇▅
tenure_year 0 1.00 5.38 3.99 1 2.00 4.00 8.00 17.00 ▇▃▂▁▁
S_Game 136 0.89 3.85 1.95 1 2.00 4.00 5.00 8.00 ▇▃▇▃▃
s_diversion 5 1.00 0.37 0.28 0 0.15 0.28 0.57 0.99 ▇▇▃▂▃
attendance 2 1.00 72662.00 27342.12 12000 48566.25 75713.50 99052.50 115109.00 ▂▇▅▆▇
confCode 0 1.00 1.85 1.47 0 1.00 1.00 3.00 4.00 ▃▇▁▃▅
school_ID 0 1.00 16.36 8.13 1 9.00 18.00 23.00 30.00 ▅▅▇▇▆
year_0 15 0.99 11.25 4.06 0 9.00 12.00 14.00 16.00 ▁▂▂▃▇
tenure_0 15 0.99 4.35 3.99 0 1.00 3.00 7.00 16.00 ▇▃▂▁▁
game_number 0 1.00 29.79 24.33 1 11.00 23.00 42.00 113.00 ▇▅▂▁▁
game_number2 0 1.00 3.86 1.96 1 2.00 4.00 5.25 8.00 ▇▅▇▃▃
game_0 15 0.99 2.87 1.96 0 1.00 3.00 5.00 7.00 ▇▅▇▃▃
game_min 0 1.00 907.33 178.15 540 720.00 930.00 1080.00 1230.00 ▁▇▆▃▆
game_time_hr_0 0 1.00 3.12 2.97 -3 0.00 3.50 6.00 8.50 ▁▇▆▃▆

Not good for visualization

summary_df <- summarytools::dfSummary(data,
                                     varnumbers=FALSE,
                                     plain.ascii=FALSE,
                                     style="grid",
                                     graph.col = TRUE,
                                     valid.col=FALSE)

# Print the summary table and suppress warnings
print(summary_df,
      method="render",
      table.classes="table-condensed")

Data Frame Summary

data

Dimensions: 1240 x 20
Duplicates: 0
Variable Stats / Values Freqs (% of Valid) Graph Missing
conference [factor]
1. ACC
2. Big10
3. Big12
4. Pac12
5. SEC
230 ( 18.5% )
487 ( 39.3% )
33 ( 2.7% )
221 ( 17.8% )
269 ( 21.7% )
0 (0.0%)
school [factor]
1. Arizona State
2. Arkansas
3. Auburn
4. Clemson
5. Colorado University
6. Duke
7. Florida
8. Georgia
9. Georgia Tech
10. Iowa
[ 20 others ]
33 ( 2.7% )
6 ( 0.5% )
36 ( 2.9% )
28 ( 2.3% )
59 ( 4.8% )
38 ( 3.1% )
43 ( 3.5% )
36 ( 2.9% )
33 ( 2.7% )
20 ( 1.6% )
908 ( 73.2% )
0 (0.0%)
year [numeric]
Mean (sd) : 2014.3 (4.1)
min ≤ med ≤ max:
2003 ≤ 2015 ≤ 2021
IQR (CV) : 5 (0)
19 distinct values 0 (0.0%)
tenure_year [numeric]
Mean (sd) : 5.4 (4)
min ≤ med ≤ max:
1 ≤ 4 ≤ 17
IQR (CV) : 6 (0.7)
17 distinct values 0 (0.0%)
S_Game [numeric]
Mean (sd) : 3.8 (1.9)
min ≤ med ≤ max:
1 ≤ 4 ≤ 8
IQR (CV) : 3 (0.5)
1 : 167 ( 15.1% )
2 : 167 ( 15.1% )
3 : 167 ( 15.1% )
4 : 164 ( 14.9% )
5 : 165 ( 14.9% )
6 : 162 ( 14.7% )
7 : 104 ( 9.4% )
8 : 8 ( 0.7% )
136 (11.0%)
s_diversion [numeric]
Mean (sd) : 0.4 (0.3)
min ≤ med ≤ max:
0 ≤ 0.3 ≤ 1
IQR (CV) : 0.4 (0.7)
1171 distinct values 5 (0.4%)
attendance [numeric]
Mean (sd) : 72662 (27342.1)
min ≤ med ≤ max:
12000 ≤ 75713.5 ≤ 115109
IQR (CV) : 50486.2 (0.4)
1051 distinct values 2 (0.2%)
Date [factor]
1. 10/1/2011
2. 10/1/2016
3. 10/10/2009
4. 10/10/2015
5. 10/10/2020
6. 10/10/2022
7. 10/11/2003
8. 10/11/2008
9. 10/11/2012
10. 10/11/2014
[ 369 others ]
2 ( 0.2% )
6 ( 0.7% )
3 ( 0.3% )
3 ( 0.3% )
2 ( 0.2% )
1 ( 0.1% )
1 ( 0.1% )
1 ( 0.1% )
1 ( 0.1% )
2 ( 0.2% )
840 ( 97.4% )
378 (30.5%)
confCode [numeric]
Mean (sd) : 1.8 (1.5)
min ≤ med ≤ max:
0 ≤ 1 ≤ 4
IQR (CV) : 2 (0.8)
0 : 230 ( 18.5% )
1 : 487 ( 39.3% )
2 : 33 ( 2.7% )
3 : 221 ( 17.8% )
4 : 269 ( 21.7% )
0 (0.0%)
school_ID [numeric]
Mean (sd) : 16.4 (8.1)
min ≤ med ≤ max:
1 ≤ 18 ≤ 30
IQR (CV) : 14 (0.5)
30 distinct values 0 (0.0%)
year_0 [numeric]
Mean (sd) : 11.3 (4.1)
min ≤ med ≤ max:
0 ≤ 12 ≤ 16
IQR (CV) : 5 (0.4)
17 distinct values 15 (1.2%)
tenure_0 [numeric]
Mean (sd) : 4.4 (4)
min ≤ med ≤ max:
0 ≤ 3 ≤ 16
IQR (CV) : 6 (0.9)
17 distinct values 15 (1.2%)
game_number [numeric]
Mean (sd) : 29.8 (24.3)
min ≤ med ≤ max:
1 ≤ 23 ≤ 113
IQR (CV) : 31 (0.8)
113 distinct values 0 (0.0%)
game_number2 [numeric]
Mean (sd) : 3.9 (2)
min ≤ med ≤ max:
1 ≤ 4 ≤ 8
IQR (CV) : 3.2 (0.5)
1 : 186 ( 15.0% )
2 : 189 ( 15.2% )
3 : 188 ( 15.2% )
4 : 184 ( 14.8% )
5 : 183 ( 14.8% )
6 : 180 ( 14.5% )
7 : 116 ( 9.4% )
8 : 14 ( 1.1% )
0 (0.0%)
game_0 [numeric]
Mean (sd) : 2.9 (2)
min ≤ med ≤ max:
0 ≤ 3 ≤ 7
IQR (CV) : 4 (0.7)
0 : 182 ( 14.9% )
1 : 185 ( 15.1% )
2 : 184 ( 15.0% )
3 : 182 ( 14.9% )
4 : 182 ( 14.9% )
5 : 180 ( 14.7% )
6 : 116 ( 9.5% )
7 : 14 ( 1.1% )
15 (1.2%)
game_time [factor]
1. 1899-12-31 09:00:00
2. 1899-12-31 10:00:00
3. 1899-12-31 11:00:00
4. 1899-12-31 11:05:00
5. 1899-12-31 11:30:00
6. 1899-12-31 12:00:00
7. 1899-12-31 12:05:00
8. 1899-12-31 12:10:00
9. 1899-12-31 12:15:00
10. 1899-12-31 12:20:00
[ 40 others ]
1 ( 0.1% )
2 ( 0.2% )
60 ( 4.8% )
2 ( 0.2% )
7 ( 0.6% )
301 ( 24.3% )
1 ( 0.1% )
2 ( 0.2% )
1 ( 0.1% )
17 ( 1.4% )
846 ( 68.2% )
0 (0.0%)
game_min [numeric]
Mean (sd) : 907.3 (178.1)
min ≤ med ≤ max:
540 ≤ 930 ≤ 1230
IQR (CV) : 360 (0.2)
50 distinct values 0 (0.0%)
game_time_hr_0 [numeric]
Mean (sd) : 3.1 (3)
min ≤ med ≤ max:
-3 ≤ 3.5 ≤ 8.5
IQR (CV) : 6 (1)
50 distinct values 0 (0.0%)
time_0 [factor]
1. 0
2. 1
3. 2
462 ( 37.7% )
392 ( 32.0% )
371 ( 30.3% )
15 (1.2%)
game_result [factor]
1. 0.0
2. 1.0
3. N/A
384 ( 31.1% )
850 ( 68.8% )
1 ( 0.1% )
5 (0.4%)

Generated by summarytools 1.1.4 (R version 4.5.1)
2025-10-06

More detail Analysis

Character Variables

Conference

frequency_table_conference <- table(data$conference) %>% data.frame()
frequency_table_conference
   Var1 Freq
1   ACC  230
2 Big10  487
3 Big12   33
4 Pac12  221
5   SEC  269
ggplot(frequency_table_conference, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Conference Categories",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(hjust = 1, size = 8))

School

frequency_table_school <- table(data$school) %>% data.frame()
frequency_table_school
                  Var1 Freq
1        Arizona State   33
2             Arkansas    6
3               Auburn   36
4              Clemson   28
5  Colorado University   59
6                 Duke   38
7              Florida   43
8              Georgia   36
9         Georgia Tech   33
10                Iowa   20
11            Kentucky   34
12                 LSU   37
13            Maryland   29
14            Michigan  113
15      Michigan State   21
16           Minnesota   21
17            NC State   31
18          Ohio State   91
19            Oklahoma   24
20          Penn State   82
21              Purdue   55
22            Stanford   32
23           Tennessee   77
24                UCLA    7
25                 UNC  100
26             UoTexas    9
27                 USC   16
28          Washington   32
29    Washington State   42
30           Wisconsin   55
ggplot(frequency_table_school, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of School Categories",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8))

Date

frequency_table_Date <- table(data$Date) %>% data.frame()
frequency_table_Date
              Var1 Freq
1        10/1/2011    2
2        10/1/2016    6
3       10/10/2009    3
4       10/10/2015    3
5       10/10/2020    2
6       10/10/2022    1
7       10/11/2003    1
8       10/11/2008    1
9       10/11/2012    1
10      10/11/2014    2
11      10/11/2017    1
12      10/12/2013    1
13      10/12/2019    4
14      10/13/2007    1
15      10/13/2018    2
16      10/14/2006    2
17      10/14/2017    5
18      10/15/2015    1
19      10/15/2016    5
20      10/15/2021    1
21      10/16/2004    1
22      10/16/2010    1
23      10/17/2009    1
24      10/17/2015    5
25      10/17/2016    1
26      10/18/2008    1
27      10/18/2014    3
28      10/18/2018    1
29      10/19/2013    1
30      10/19/2019    3
31       10/2/2010    1
32      10/20/2007    3
33      10/20/2012    3
34      10/20/2018    5
35      10/21/2006    2
36      10/21/2017    1
37      10/22/2005    1
38      10/22/2011    1
39      10/22/2016    5
40      10/23/2010    3
41      10/24/2009    2
42      10/24/2015    4
43      10/24/2020    1
44      10/25/2003    1
45      10/25/2008    3
46      10/25/2014    3
47      10/26/2013    3
48      10/26/2018    1
49      10/26/2019    3
50      10/27/2007    2
51      10/27/2012    1
52      10/27/2018    3
53      10/28/2006    1
54      10/28/2017    4
55      10/29/2011    2
56      10/29/2016    2
57      10/29/2022    1
58       10/3/2009    1
59       10/3/2015    8
60       10/3/2020    2
61      10/31/2009    4
62      10/31/2013    1
63      10/31/2015    2
64       10/4/2008    1
65       10/4/2014    3
66       10/5/2013    3
67       10/5/2019    3
68       10/6/2007    2
69       10/6/2012    1
70       10/6/2018    7
71       10/7/2017    6
72       10/7/2022    1
73       10/8/2015    1
74       10/8/2016    3
75       10/9/2004    1
76       10/9/2010    1
77       11/1/2003    1
78       11/1/2008    1
79       11/1/2014    3
80      11/10/2007    3
81      11/10/2012    2
82      11/10/2016    1
83      11/10/2018    7
84      11/11/2006    2
85      11/11/2017    2
86      11/11/2022    1
87      11/12/2011    1
88      11/12/2016    6
89      11/13/2004    1
90      11/13/2010    2
91      11/13/2015    1
92      11/14/2009    3
93      11/14/2015    5
94      11/14/2020    2
95      11/14/2022    1
96      11/15/2008    1
97      11/15/2014    2
98      11/16/2013    2
99      11/16/2019    2
100     11/17/2007    3
101     11/17/2012    1
102     11/17/2018    6
103     11/18/2006    1
104     11/18/2017    6
105     11/18/2018    1
106     11/19/2005    1
107     11/19/2011    1
108     11/19/2016    6
109      11/2/2019    3
110     11/20/2010    1
111     11/21/2009    2
112     11/21/2015    6
113     11/21/2020    1
114     11/21/2022    1
115     11/22/2003    1
116     11/22/2008    2
117     11/22/2014    4
118     11/23/2007    1
119     11/23/2012    1
120     11/23/2013    4
121     11/23/2018    2
122     11/23/2019    6
123     11/24/2007    1
124     11/24/2012    3
125     11/24/2018    3
126     11/25/2006    1
127     11/25/2016    3
128     11/25/2017    6
129     11/26/2011    1
130     11/26/2016    4
131     11/27/2009    1
132     11/27/2010    1
133     11/28/2015    6
134     11/29/2008    1
135     11/29/2014    3
136      11/3/2007    2
137      11/3/2012    3
138      11/3/2016    1
139      11/3/2018    6
140     11/30/2013    1
141     11/30/2019    5
142      11/4/2006    1
143      11/4/2011    1
144      11/4/2017    6
145      11/4/2022    1
146      11/5/2005    1
147      11/5/2011    1
148      11/5/2016    5
149      11/6/2004    1
150      11/7/2009    2
151      11/7/2015    8
152      11/8/2008    2
153      11/8/2014    1
154      11/9/2013    1
155      11/9/2019    5
156     12/18/2020    1
157     12/23/2019    1
158      12/5/2020    1
159      12/6/2014    1
160      12/6/2020    1
161       2/9/2016    1
162      5/11/2016    1
163      8/10/2016    1
164      8/17/2016    1
165      8/29/2019    1
166      8/30/2008    2
167      8/30/2014    2
168      8/31/2013    3
169      8/31/2017    1
170      8/31/2018    1
171      8/31/2019    5
172       9/1/2007    1
173       9/1/2016    1
174       9/1/2018    8
175      9/10/2005    1
176      9/10/2011    2
177      9/10/2016    7
178      9/11/2010    1
179      9/11/2016    1
180      9/12/2009    3
181      9/12/2015    6
182      9/13/2008    2
183      9/13/2014    5
184      9/14/2013    2
185      9/14/2019    4
186      9/15/2007    2
187      9/15/2012    2
188      9/15/2018   11
189      9/16/2006    2
190      9/16/2014    1
191      9/16/2016    1
192      9/16/2017    6
193      9/16/2022    1
194      9/17/2005    1
195      9/17/2016    5
196      9/18/2004    1
197      9/18/2010    2
198      9/18/2022    1
199      9/19/2009    2
200      9/19/2015    7
201       9/2/2006    3
202       9/2/2010    1
203       9/2/2015    1
204       9/2/2017    4
205      9/20/2003    1
206      9/20/2008    2
207      9/20/2014    2
208      9/20/2017    1
209      9/20/2019    1
210      9/21/2013    3
211      9/21/2019    5
212      9/22/2007    3
213      9/22/2012    3
214      9/22/2018    5
215      9/23/2006    3
216      9/23/2017    8
217      9/23/2022    1
218      9/24/2005    1
219      9/24/2011    1
220      9/24/2016    4
221      9/25/2010    1
222      9/26/2009    2
223      9/26/2015    5
224      9/26/2022    1
225      9/27/2003    1
226      9/27/2008    2
227      9/27/2014    1
228      9/28/2013    1
229      9/28/2018    1
230      9/28/2019    2
231      9/29/2007    1
232      9/29/2012    1
233      9/29/2017    1
234      9/29/2018    5
235       9/3/2011    1
236       9/3/2016    6
237      9/30/2006    1
238      9/30/2017    4
239       9/4/2016    1
240       9/5/2004    1
241       9/5/2009    2
242       9/5/2015    6
243       9/6/2003    1
244       9/6/2008    1
245       9/6/2009    1
246       9/6/2014    2
247       9/6/2019    1
248       9/7/2007    1
249       9/7/2013    3
250       9/7/2019    8
251       9/8/2007    3
252       9/8/2012    4
253       9/8/2018    7
254       9/9/2006    2
255       9/9/2017    6
256       9/9/2022    2
257        Aug. 29    1
258        aug. 30    2
259        Aug. 30    1
260        aug. 31    1
261         dec. 1    1
262        nov. 10    4
263        nov. 11    2
264        Nov. 11    1
265        nov. 12    2
266  Nov. 12, 2016    1
267        nov. 13    1
268        nov. 14    3
269        nov. 15    1
270        Nov. 15    1
271        nov. 16    3
272        Nov. 16    1
273        nov. 17    2
274        Nov. 17    1
275        nov. 18    3
276        Nov. 18    1
277        nov. 19    4
278         nov. 2    1
279         Nov. 2    1
280        nov. 20    1
281        Nov. 20    1
282        nov. 21    2
283        nov. 22    1
284        nov. 23    1
285        nov. 24    4
286        Nov. 24    1
287        nov. 25    3
288        nov. 26    1
289  Nov. 26, 2016    1
290        nov. 28    2
291        nov. 29    1
292        Nov. 29    1
293         nov. 3    3
294         Nov. 3    1
295        nov. 30    4
296         nov. 4    1
297         nov. 5    4
298   Nov. 5, 2016    1
299         nov. 6    1
300         nov. 7    2
301         nov. 8    2
302         nov. 9    3
303         oct. 1    3
304         Oct. 1    1
305   Oct. 1, 2016    1
306        oct. 10    2
307        oct. 11    1
308        oct. 12    1
309        Oct. 12    1
310        oct. 13    3
311        oct. 14    3
312        oct. 15    2
313  Oct. 15, 2016    1
314        oct. 17    3
315        oct. 18    1
316         oct. 2    1
317        oct. 20    1
318        Oct. 20    1
319        oct. 21    2
320        oct. 22    2
321        oct. 24    2
322        oct. 25    1
323        oct. 26    1
324        Oct. 26    1
325        oct. 27    1
326        oct. 28    2
327        Oct. 28    1
328        oct. 29    3
329         oct. 3    4
330        oct. 30    2
331        oct. 31    2
332         oct. 4    2
333         oct. 5    3
334         oct. 6    3
335         oct. 7    1
336         Oct. 7    1
337         oct. 8    4
338         oct. 9    2
339         Oct.18    1
340       sept 25.    1
341        sept. 1    3
342        Sept. 1    1
343       sept. 10    3
344 Sept. 10, 2016    1
345       sept. 12    4
346       Sept. 13    1
347       sept. 14    1
348       sept. 15    1
349       Sept. 15    1
350       sept. 16    3
351       sept. 17    5
352 Sept. 17, 2016    1
353       sept. 18    2
354       sept. 19    3
355        sept. 2    3
356        Sept. 2    1
357       sept. 20    1
358       Sept. 20    1
359       sept. 21    2
360       Sept. 21    1
361       sept. 22    5
362       sept. 23    2
363       Sept. 23    1
364       sept. 24    1
365       sept. 26    2
366       sept. 28    2
367       sept. 29    5
368       Sept. 29    1
369        sept. 3    3
370       sept. 30    2
371        sept. 4    1
372        sept. 5    2
373        sept. 6    1
374        sept. 7    5
375        Sept. 7    1
376        sept. 8    1
377        sept. 9    5
378        Sept. 9    1
379         sept.1    1
ggplot(frequency_table_Date, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Date Categories",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 1))

Time Central 0

frequency_table_time_0 <- table(data$time_0) %>% data.frame()
frequency_table_time_0
  Var1 Freq
1    0  462
2    1  392
3    2  371
ggplot(frequency_table_time_0, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Time Central 0 Categories",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(hjust = 1, size = 8))

Game Result

frequency_table_game_result <- table(data$game_result) %>% data.frame()
frequency_table_game_result
  Var1 Freq
1  0.0  384
2  1.0  850
3  N/A    1
ggplot(frequency_table_game_result, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Game Result Categories",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(hjust = 1, size = 8))

Game time

frequency_table_game_time <- table(data$game_time) %>% data.frame()
frequency_table_game_time
                  Var1 Freq
1  1899-12-31 09:00:00    1
2  1899-12-31 10:00:00    2
3  1899-12-31 11:00:00   60
4  1899-12-31 11:05:00    2
5  1899-12-31 11:30:00    7
6  1899-12-31 12:00:00  301
7  1899-12-31 12:05:00    1
8  1899-12-31 12:10:00    2
9  1899-12-31 12:15:00    1
10 1899-12-31 12:20:00   17
11 1899-12-31 12:30:00   71
12 1899-12-31 12:45:00    1
13 1899-12-31 13:00:00   17
14 1899-12-31 13:30:00   16
15 1899-12-31 13:45:00    1
16 1899-12-31 14:00:00    8
17 1899-12-31 14:30:00   51
18 1899-12-31 14:35:00    3
19 1899-12-31 14:40:00    1
20 1899-12-31 15:00:00   24
21 1899-12-31 15:15:00    1
22 1899-12-31 15:30:00  213
23 1899-12-31 15:45:00    3
24 1899-12-31 16:00:00   46
25 1899-12-31 16:05:00    1
26 1899-12-31 16:20:00    1
27 1899-12-31 16:30:00   10
28 1899-12-31 17:00:00   21
29 1899-12-31 17:30:00    8
30 1899-12-31 17:40:00    1
31 1899-12-31 17:51:00    1
32 1899-12-31 18:00:00   54
33 1899-12-31 18:02:00    1
34 1899-12-31 18:05:00    1
35 1899-12-31 18:06:00    1
36 1899-12-31 18:15:00    1
37 1899-12-31 18:30:00   18
38 1899-12-31 18:40:00    1
39 1899-12-31 18:45:00    1
40 1899-12-31 18:50:00    2
41 1899-12-31 19:00:00   97
42 1899-12-31 19:14:00    1
43 1899-12-31 19:15:00    3
44 1899-12-31 19:30:00  105
45 1899-12-31 19:35:00    1
46 1899-12-31 19:45:00    9
47 1899-12-31 20:00:00   47
48 1899-12-31 20:10:00    1
49 1899-12-31 20:20:00    2
50 1899-12-31 20:30:00    1
ggplot(frequency_table_game_time, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Game time Categories",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 5))

Numerical Variables

Year

ggplot(data = data, aes(x =year)) +
geom_histogram(binwidth = 1,fill = "black", color = "black") +
          labs(title = "Histogram of Year", x = "Variable Value", y = "Frequency")+
  theme_classic()

frequency_table_year <- table(data$year) %>% data.frame()
frequency_table_year
   Var1 Freq
1  2003    7
2  2004   18
3  2005   19
4  2006   35
5  2007   42
6  2008   36
7  2009   50
8  2010   35
9  2011   35
10 2012   60
11 2013   64
12 2014   85
13 2015  142
14 2016  151
15 2017  153
16 2018  160
17 2019  133
18 2020   11
19 2021    4
ggplot(frequency_table_year, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Year",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8))

Tenure Year

ggplot(data.frame(tenure_year = data$tenure_year), aes(x = tenure_year)) +
geom_histogram(binwidth = 1,fill = "black", color = "black") +
          labs(title = "Histogram of Tenure Year", x = "Variable Value", y = "Frequency")+
  theme_classic()

frequency_table_tenure_year <- table(data$tenure_year) %>% data.frame()
frequency_table_tenure_year
   Var1 Freq
1     1  182
2     2  165
3     3  166
4     4  147
5     5  130
6     6   78
7     7   59
8     8   49
9     9   40
10   10   49
11   11   39
12   12   34
13   13   34
14   14   28
15   15   18
16   16   16
17   17    6
ggplot(frequency_table_tenure_year, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Tenure Year",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8))

Season Game

hist(data$S_Game, main = "Histogram of Season Game", xlab = "Variable Value", ylab = "Frequency")

ggplot(data = data, aes(x =S_Game)) +
geom_histogram(binwidth = 1,fill = "black", color = "black") +
          labs(title = "Histogram of Season Game", x = "Variable Value", y = "Frequency")+
  theme_classic()+
      scale_x_continuous(limits = c(1, 10), breaks = seq(1,10))
Warning: Removed 136 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_bar()`).

Stadium Waste Diversion

hist(data$s_diversion, xlab="Variable Value", ylab ="Frequency",main = "Histogram of Stadium Waste Diversion" )

ggplot(data = data, aes(x =s_diversion)) +
geom_histogram(fill = "black", color = "black") +
          labs(title = "Histogram of Stadium Waste Diversion", x = "Variable Value", y = "Frequency")+
  theme_classic()+scale_x_continuous(limits = c(0,1) )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 5 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_bar()`).

Attendance

hist(data$attendance, main = "Histogram of Attendance", xlab = "Variable Value", ylab = "Frequency")

ggplot(data = data, aes(x =attendance)) +
geom_histogram(fill = "black", color = "black") +
          labs(title = "Histogram of Attendance", x = "Variable Value", y = "Frequency")+
  theme_classic()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_bin()`).

Conference Code

ggplot(data = data, aes(x =confCode)) +
geom_histogram(binwidth = 1,fill = "black", color = "black") +
          labs(title = "Histogram of Attendance", x = "Variable Value", y = "Frequency")+
  theme_classic()

frequency_table_conference_code <- table(data$confCode) %>% data.frame()
frequency_table_conference_code
  Var1 Freq
1    0  230
2    1  487
3    2   33
4    3  221
5    4  269
ggplot(frequency_table_conference_code, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Tenure Year Central 0",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(hjust = 1, size = 8))

School ID

ggplot(data = data, aes(x =school_ID)) +
geom_histogram(binwidth = 1,fill = "black", color = "black") +
          labs(title = "Histogram of School ID", x = "Variable Value", y = "Frequency")+
  theme_classic()

frequency_table_school_ID <- table(data$school_ID) %>% data.frame()
frequency_table_school_ID
   Var1 Freq
1     1   33
2     2    6
3     3   36
4     4   28
5     5   59
6     6   38
7     7   43
8     8   36
9     9   33
10   10   20
11   11   34
12   12   37
13   13   29
14   14  113
15   15   21
16   16   21
17   17   31
18   18   91
19   19   24
20   20   82
21   21   55
22   22   32
23   23   77
24   24    7
25   25  100
26   26    9
27   27   16
28   28   32
29   29   42
30   30   55
ggplot(frequency_table_school_ID, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Tenure Year Central 0",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(hjust = 1, size = 8))

Year Central 0

ggplot(data = data, aes(x =year_0)) +
geom_histogram(binwidth = 1,fill = "black", color = "black") +
          labs(title = "Histogram of Year Central 0", x = "Variable Value", y = "Frequency")+
  theme_classic()
Warning: Removed 15 rows containing non-finite outside the scale range
(`stat_bin()`).

frequency_table_year_0 <- table(data$year_0) %>% data.frame()
frequency_table_year_0
   Var1 Freq
1     0    7
2     1   18
3     2   19
4     3   35
5     4   42
6     5   36
7     6   50
8     7   35
9     8   35
10    9   60
11   10   64
12   11   85
13   12  142
14   13  151
15   14  153
16   15  160
17   16  133
ggplot(frequency_table_year_0, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Tenure Year Central 0",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(hjust = 1, size = 8))

Tenure Year Central 0

ggplot(data = data, aes(x =tenure_0)) +
geom_histogram(binwidth = 1,fill = "black", color = "black") +
          labs(title = "Histogram of Tenure Year Central 0", x = "Variable Value", y = "Frequency")+
  theme_classic()
Warning: Removed 15 rows containing non-finite outside the scale range
(`stat_bin()`).

frequency_table_tenure_year_0 <- table(data$tenure_0) %>% data.frame()
frequency_table_tenure_year_0
   Var1 Freq
1     0  182
2     1  165
3     2  166
4     3  147
5     4  125
6     5   72
7     6   59
8     7   49
9     8   40
10    9   49
11   10   39
12   11   34
13   12   30
14   13   28
15   14   18
16   15   16
17   16    6
ggplot(frequency_table_tenure_year_0, aes(x = Var1, y = Freq)) +
  geom_bar(stat = "identity", fill = "black") +
  labs(title = "Barplot for Bar Plot of Tenure Year Central 0",
       x = "Categories",
       y = "Frequency") +
  theme_classic()+
  theme(axis.text.x = element_text(hjust = 1, size = 8))

Game Number

hist(data$game_number,main = "Histogram of Game Number", xlab = "Variable Value", ylab = "Frequency")

ggplot(data = data, aes(x =game_number)) +
geom_histogram(fill = "black", color = "black") +
          labs(title = "Histogram of Game Number", x = "Variable Value", y = "Frequency")+
  theme_classic()+scale_x_continuous(limits= c(1,113))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_bar()`).

Game Number 2

hist(data$game_number2, main = "Histogram of Game Number 2", xlab = "Variable Value", ylab = "Frequency")

ggplot(data = data, aes(x =game_number2)) +
geom_histogram(binwidth = 1,fill = "black", color = "black") +
          labs(title = "Histogram of Game Number 2", x = "Variable Value", y = "Frequency")+
  theme_classic()

Game central 0

hist(data$game_0, main = "Histogram of Game central 0", xlab = "Variable Value", ylab = "Frequency")

ggplot(data = data, aes(x =game_0)) +
geom_histogram(binwidth = 1,fill = "black", color = "black") +
          labs(title = "Histogram of Game central 0", x = "Variable Value", y = "Frequency")+
  theme_classic()
Warning: Removed 15 rows containing non-finite outside the scale range
(`stat_bin()`).

Game Minutes

hist (data$game_min, main = "Histogram of Game Minutes", xlab = "Variable Value", ylab = "Frequency")

ggplot(data = data, aes(x =game_min)) +
geom_histogram(fill = "black", color = "black") +
          labs(title = "Histogram of Game Minutes", x = "Variable Value", y = "Frequency")+
  theme_classic()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Game time hours 0

hist(data$game_time_hr_0, main = "Histogram of Game time hours 0", xlab = "Variable Value", ylab = "Frequency")

ggplot(data = data, aes(x =game_time_hr_0)) +
geom_histogram(fill = "black", color = "black") +
          labs(title = "Histogram of Game time hours 0", x = "Variable Value", y = "Frequency")+
  theme_classic()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.