Install Packages
library (skimr)
library (summarytools)
Warning in fun(libname, pkgname): couldn't connect to display ":0"
library (readxl)
library ("readr" )
library ("tidyverse" )
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ purrr 1.1.0
✔ forcats 1.0.1 ✔ stringr 1.5.2
✔ ggplot2 4.0.0 ✔ tibble 3.3.0
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
✖ tibble::view() masks summarytools::view()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Import the Data
Warning: Expecting numeric in AK146 / R146C37: got a date
Warning: Expecting numeric in AL146 / R146C38: got a date
Warning: Expecting numeric in AK147 / R147C37: got a date
Warning: Expecting numeric in AL147 / R147C38: got a date
Warning: Expecting numeric in AK148 / R148C37: got a date
Warning: Expecting numeric in AL148 / R148C38: got a date
Warning: Expecting numeric in AK341 / R341C37: got a date
Warning: Expecting numeric in AL341 / R341C38: got a date
Warning: Expecting numeric in AK342 / R342C37: got a date
Warning: Expecting numeric in AL342 / R342C38: got a date
Warning: Expecting numeric in AK343 / R343C37: got a date
Warning: Expecting numeric in AL343 / R343C38: got a date
Warning: Expecting numeric in AK373 / R373C37: got a date
Warning: Expecting numeric in AL373 / R373C38: got a date
Warning: Expecting numeric in AK374 / R374C37: got a date
Warning: Expecting numeric in AL374 / R374C38: got a date
Warning: Expecting numeric in AK375 / R375C37: got a date
Warning: Expecting numeric in AL375 / R375C38: got a date
Warning: Expecting numeric in AK376 / R376C37: got a date
Warning: Expecting numeric in AL376 / R376C38: got a date
Warning: Expecting numeric in AK377 / R377C37: got a date
Warning: Expecting numeric in AL377 / R377C38: got a date
Warning: Expecting numeric in AK865 / R865C37: got a date
Warning: Expecting numeric in AL865 / R865C38: got a date
Warning: Expecting numeric in AK866 / R866C37: got a date
Warning: Expecting numeric in AL866 / R866C38: got a date
Warning: Expecting numeric in AK867 / R867C37: got a date
Warning: Expecting numeric in AL867 / R867C38: got a date
Warning: Expecting numeric in AK868 / R868C37: got a date
Warning: Expecting numeric in AL868 / R868C38: got a date
Collected_Data_old_select <- Collected_Data_old %>% select (Conference,School,Year,` Tenure Year ` ,S_Game,S_Diversion,Attendance,Date)
Collected_Data_old_renamed <- Collected_Data_old_select %>%
rename (
conference = Conference,
school = School,
year = Year,
tenure_year = ` Tenure Year ` ,
s_diversion = S_Diversion,
attendance = Attendance,
Date = Date
)%>%
mutate (s_diversion = na_if (s_diversion, "#DIV/0!" )) %>%
mutate (s_diversion = as.numeric (s_diversion)) %>% mutate (attendance = as.numeric (attendance))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `s_diversion = as.numeric(s_diversion)`.
Caused by warning:
! NAs introduced by coercion
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `attendance = as.numeric(attendance)`.
Caused by warning:
! NAs introduced by coercion
str (Collected_Data_old_renamed)
tibble [1,396 × 8] (S3: tbl_df/tbl/data.frame)
$ conference : chr [1:1396] "Big10" "Big10" "Big10" "Big10" ...
$ school : chr [1:1396] "Iowa" "Iowa" "Iowa" "Iowa" ...
$ year : num [1:1396] 2018 2018 2018 2018 2015 ...
$ tenure_year: num [1:1396] 3 3 3 3 4 5 4 1 8 7 ...
$ S_Game : num [1:1396] 4 5 6 7 6 3 5 3 6 4 ...
$ s_diversion: num [1:1396] NA NA NA NA NA ...
$ attendance : num [1:1396] 69250 69250 66493 65299 85821 ...
$ Date : chr [1:1396] "9/22/2018" "10/20/2018" "11/10/2018" "11/23/2018" ...
Collected_Data_clean <- Collected_Data %>% select (conference, confCode,school,school_ID,year,year_0,tenure_year,tenure_0,game_number,game_number2,game_0,s_diversion,attendance,game_time,game_min,game_time_hr_0,time_0,game_result,Date)%>%
mutate (s_diversion = na_if (s_diversion, "#DIV/0!" )) %>%
mutate (s_diversion = as.numeric (s_diversion)) %>% mutate (attendance = as.numeric (attendance))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `attendance = as.numeric(attendance)`.
Caused by warning:
! NAs introduced by coercion
head (Collected_Data_clean)
# A tibble: 6 × 19
conference confCode school school_ID year year_0 tenure_year tenure_0
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Big10 1 Iowa 10 2018 15 3 2
2 Big10 1 Iowa 10 2018 15 3 2
3 Big10 1 Iowa 10 2018 15 3 2
4 Big10 1 Iowa 10 2018 15 3 2
5 Big12 2 Oklahoma 19 2015 12 4 3
6 Big10 1 Ohio State 18 2014 11 8 7
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
# s_diversion <dbl>, attendance <dbl>, game_time <dttm>, game_min <dbl>,
# game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
str (Collected_Data_clean)
tibble [1,240 × 19] (S3: tbl_df/tbl/data.frame)
$ conference : chr [1:1240] "Big10" "Big10" "Big10" "Big10" ...
$ confCode : num [1:1240] 1 1 1 1 2 1 1 1 1 1 ...
$ school : chr [1:1240] "Iowa" "Iowa" "Iowa" "Iowa" ...
$ school_ID : num [1:1240] 10 10 10 10 19 18 18 18 18 18 ...
$ year : num [1:1240] 2018 2018 2018 2018 2015 ...
$ year_0 : num [1:1240] 15 15 15 15 12 11 10 9 10 9 ...
$ tenure_year : num [1:1240] 3 3 3 3 4 8 7 6 7 6 ...
$ tenure_0 : num [1:1240] 2 2 2 2 3 7 6 5 6 5 ...
$ game_number : num [1:1240] 17 18 19 20 24 55 47 42 49 43 ...
$ game_number2 : num [1:1240] 4 5 6 7 6 6 4 7 7 8 ...
$ game_0 : num [1:1240] 3 4 5 6 5 5 3 6 6 7 ...
$ s_diversion : num [1:1240] NA NA NA NA NA ...
$ attendance : num [1:1240] 69250 69250 66493 65299 85821 ...
$ game_time : POSIXct[1:1240], format: "1899-12-31 19:35:00" "1899-12-31 11:00:00" ...
$ game_min : num [1:1240] 1175 660 870 660 1154 ...
$ game_time_hr_0: num [1:1240] 7.58 -1 2.5 -1 7.23 ...
$ time_0 : chr [1:1240] "2" "0" "1" "0" ...
$ game_result : chr [1:1240] "0.0" "1.0" "0.0" "1.0" ...
$ Date : chr [1:1240] "9/22/2018" "10/20/2018" "11/10/2018" "11/23/2018" ...
data <- full_join (Collected_Data_old_renamed, Collected_Data_clean)
Joining with `by = join_by(conference, school, year, tenure_year, s_diversion,
attendance, Date)`
Descriptive Analysis
tibble [1,532 × 20] (S3: tbl_df/tbl/data.frame)
$ conference : chr [1:1532] "Big10" "Big10" "Big10" "Big10" ...
$ school : chr [1:1532] "Iowa" "Iowa" "Iowa" "Iowa" ...
$ year : num [1:1532] 2018 2018 2018 2018 2015 ...
$ tenure_year : num [1:1532] 3 3 3 3 4 5 4 1 8 7 ...
$ S_Game : num [1:1532] 4 5 6 7 6 3 5 3 6 4 ...
$ s_diversion : num [1:1532] NA NA NA NA NA ...
$ attendance : num [1:1532] 69250 69250 66493 65299 85821 ...
$ Date : chr [1:1532] "9/22/2018" "10/20/2018" "11/10/2018" "11/23/2018" ...
$ confCode : num [1:1532] 1 1 1 1 2 3 3 4 1 1 ...
$ school_ID : num [1:1532] 10 10 10 10 19 1 1 7 18 18 ...
$ year_0 : num [1:1532] 15 15 15 15 12 16 15 10 11 10 ...
$ tenure_0 : num [1:1532] 2 2 2 2 3 4 3 0 7 6 ...
$ game_number : num [1:1532] 17 18 19 20 24 29 25 3 55 47 ...
$ game_number2 : num [1:1532] 4 5 6 7 6 3 5 3 6 4 ...
$ game_0 : num [1:1532] 3 4 5 6 5 2 4 2 5 3 ...
$ game_time : POSIXct[1:1532], format: "1899-12-31 19:35:00" "1899-12-31 11:00:00" ...
$ game_min : num [1:1532] 1175 660 870 660 1154 ...
$ game_time_hr_0: num [1:1532] 7.58 -1 2.5 -1 7.23 ...
$ time_0 : chr [1:1532] "2" "0" "1" "0" ...
$ game_result : chr [1:1532] "0.0" "1.0" "0.0" "1.0" ...
conference school year tenure_year
Length:1532 Length:1532 Min. :2003 Min. : 1.00
Class :character Class :character 1st Qu.:2013 1st Qu.: 3.00
Mode :character Mode :character Median :2016 Median : 5.00
Mean :2015 Mean : 6.03
3rd Qu.:2018 3rd Qu.: 9.00
Max. :2024 Max. :20.00
S_Game s_diversion attendance Date
Min. :1.000 Min. :0.0000 Min. : 1275 Length:1532
1st Qu.:2.000 1st Qu.:0.1636 1st Qu.: 49118 Class :character
Median :4.000 Median :0.3039 Median : 80050 Mode :character
Mean :3.873 Mean :0.3999 Mean : 74007
3rd Qu.:6.000 3rd Qu.:0.6617 3rd Qu.:101020
Max. :9.000 Max. :0.9868 Max. :115109
NA's :136 NA's :19 NA's :2
confCode school_ID year_0 tenure_0
Min. :0.000 Min. : 1.00 Min. : 0.00 Min. : 0.000
1st Qu.:1.000 1st Qu.: 9.00 1st Qu.: 9.00 1st Qu.: 1.000
Median :1.000 Median :18.00 Median :12.00 Median : 3.000
Mean :1.882 Mean :16.37 Mean :11.25 Mean : 4.353
3rd Qu.:3.000 3rd Qu.:23.00 3rd Qu.:14.00 3rd Qu.: 7.000
Max. :4.000 Max. :30.00 Max. :16.00 Max. :16.000
NA's :307 NA's :307 NA's :307 NA's :307
game_number game_number2 game_0 game_time
Min. : 1.00 Min. :1.000 Min. :0.000 Min. :1899-12-31 09:00:00
1st Qu.: 11.00 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1899-12-31 12:00:00
Median : 23.00 Median :4.000 Median :3.000 Median :1899-12-31 15:30:00
Mean : 29.79 Mean :3.857 Mean :2.874 Mean :1899-12-31 15:07:19
3rd Qu.: 42.00 3rd Qu.:5.250 3rd Qu.:5.000 3rd Qu.:1899-12-31 18:00:00
Max. :113.00 Max. :8.000 Max. :7.000 Max. :1899-12-31 20:30:00
NA's :292 NA's :292 NA's :307 NA's :292
game_min game_time_hr_0 time_0 game_result
Min. : 540.0 Min. :-3.000 Length:1532 Length:1532
1st Qu.: 720.0 1st Qu.: 0.000 Class :character Class :character
Median : 930.0 Median : 3.500 Mode :character Mode :character
Mean : 907.3 Mean : 3.122
3rd Qu.:1080.0 3rd Qu.: 6.000
Max. :1230.0 Max. : 8.500
NA's :292 NA's :292
data <- data %>% mutate (game_time= as.character (game_time))%>%
mutate_if (is.character, factor)
Not good for categorical variables
Data summary
Name
data
Number of rows
1532
Number of columns
20
_______________________
Column type frequency:
factor
6
numeric
14
________________________
Group variables
None
Variable type: factor
conference
0
1.00
FALSE
5
Big: 604, SEC: 365, Pac: 255, ACC: 230
school
0
1.00
FALSE
31
Mic: 147, Ohi: 123, Ten: 123, UNC: 100
Date
384
0.75
FALSE
456
9/1: 17, 9/1: 12, 9/7: 12, 9/8: 12
game_time
292
0.81
FALSE
50
189: 301, 189: 213, 189: 105, 189: 97
time_0
307
0.80
FALSE
3
0: 462, 1: 392, 2: 371
game_result
297
0.81
FALSE
3
1.0: 850, 0.0: 384, N/A: 1
Variable type: numeric
year
0
1.00
2015.29
4.46
2003
2013.00
2016.0
2018.00
2024.00
▂▂▅▇▂
tenure_year
0
1.00
6.03
4.56
1
3.00
5.0
9.00
20.00
▇▅▂▂▁
S_Game
136
0.91
3.87
1.98
1
2.00
4.0
6.00
9.00
▇▇▃▆▁
s_diversion
19
0.99
0.40
0.28
0
0.16
0.3
0.66
0.99
▇▇▃▃▃
attendance
2
1.00
74006.59
27451.71
1275
49118.50
80049.5
101019.50
115109.00
▁▅▅▅▇
confCode
307
0.80
1.88
1.44
0
1.00
1.0
3.00
4.00
▃▇▁▃▅
school_ID
307
0.80
16.37
8.14
1
9.00
18.0
23.00
30.00
▅▅▇▇▆
year_0
307
0.80
11.25
4.06
0
9.00
12.0
14.00
16.00
▁▂▂▃▇
tenure_0
307
0.80
4.35
3.99
0
1.00
3.0
7.00
16.00
▇▃▂▁▁
game_number
292
0.81
29.79
24.33
1
11.00
23.0
42.00
113.00
▇▅▂▁▁
game_number2
292
0.81
3.86
1.96
1
2.00
4.0
5.25
8.00
▇▅▇▃▃
game_0
307
0.80
2.87
1.96
0
1.00
3.0
5.00
7.00
▇▅▇▃▃
game_min
292
0.81
907.33
178.15
540
720.00
930.0
1080.00
1230.00
▁▇▆▃▆
game_time_hr_0
292
0.81
3.12
2.97
-3
0.00
3.5
6.00
8.50
▁▇▆▃▆
Not good for visualization
summary_df <- summarytools:: dfSummary (data,
varnumbers= FALSE ,
plain.ascii= FALSE ,
style= "grid" ,
graph.col = TRUE ,
valid.col= FALSE )
# Print the summary table and suppress warnings
print (summary_df,
method= "render" ,
table.classes= "table-condensed" )
More detail Analysis
Character Variables
Conference
frequency_table_conference <- table (data$ conference) %>% data.frame ()
frequency_table_conference
Var1 Freq
1 ACC 230
2 Big10 604
3 Big12 78
4 Pac12 255
5 SEC 365
ggplot (frequency_table_conference, aes (x = Var1, y = Freq)) +
geom_bar (stat = "identity" , fill = "black" ) +
labs (title = "Barplot for Bar Plot of Conference Categories" ,
x = "Categories" ,
y = "Frequency" ) +
theme_classic ()+
theme (axis.text.x = element_text (hjust = 1 , size = 8 ))
School
frequency_table_school <- table (data$ school) %>% data.frame ()
frequency_table_school
Var1 Freq
1 Arizona State 41
2 Arkansas 6
3 Auburn 36
4 Clemson 28
5 Colorado University 59
6 Duke 38
7 Florida 82
8 Georgia 36
9 Georgia Tech 33
10 Illinois 28
11 Iowa 20
12 Kentucky 45
13 LSU 37
14 Maryland 29
15 Michigan 147
16 Michigan State 21
17 Minnesota 42
18 NC State 31
19 Ohio State 123
20 Oklahoma 24
21 Penn State 84
22 Purdue 55
23 Stanford 44
24 Tennessee 123
25 UCLA 14
26 UNC 100
27 UoTexas 54
28 USC 23
29 Washington 32
30 Washington State 42
31 Wisconsin 55
ggplot (frequency_table_school, aes (x = Var1, y = Freq)) +
geom_bar (stat = "identity" , fill = "black" ) +
labs (title = "Barplot for Bar Plot of School Categories" ,
x = "Categories" ,
y = "Frequency" ) +
theme_classic ()+
theme (axis.text.x = element_text (angle = 90 , hjust = 1 , size = 8 ))
Date
frequency_table_Date <- table (data$ Date) %>% data.frame ()
frequency_table_Date
Var1 Freq
1 10/1/2011 2
2 10/1/2016 6
3 10/1/2022 3
4 10/10/2009 3
5 10/10/2015 4
6 10/10/2020 2
7 10/10/2021 1
8 10/10/2022 1
9 10/11/2003 1
10 10/11/2008 1
11 10/11/2012 1
12 10/11/2014 4
13 10/11/2017 2
14 10/12/2013 1
15 10/12/2019 7
16 10/12/2024 1
17 10/13/2007 1
18 10/13/2018 3
19 10/14/2006 2
20 10/14/2017 8
21 10/14/2023 1
22 10/15/2015 1
23 10/15/2016 7
24 10/15/2021 1
25 10/15/2022 3
26 10/16/2004 1
27 10/16/2010 1
28 10/16/2021 1
29 10/17/2009 1
30 10/17/2015 5
31 10/17/2016 1
32 10/18/2008 1
33 10/18/2014 4
34 10/18/2018 2
35 10/19/2013 1
36 10/19/2019 4
37 10/19/2024 2
38 10/2/2010 1
39 10/20/2007 3
40 10/20/2012 4
41 10/20/2018 7
42 10/21/2006 2
43 10/21/2017 2
44 10/21/2023 1
45 10/22/2005 1
46 10/22/2011 1
47 10/22/2016 5
48 10/22/2022 1
49 10/22/2023 1
50 10/23/2010 3
51 10/23/2021 1
52 10/24/2009 2
53 10/24/2015 5
54 10/24/2020 3
55 10/25/2003 1
56 10/25/2008 3
57 10/25/2014 4
58 10/26/2013 3
59 10/26/2018 2
60 10/26/2019 4
61 10/26/2024 3
62 10/27/2007 2
63 10/27/2012 1
64 10/27/2018 4
65 10/28/2006 1
66 10/28/2017 4
67 10/28/2023 2
68 10/29/2011 2
69 10/29/2016 2
70 10/29/2022 3
71 10/3/2009 1
72 10/3/2015 10
73 10/3/2020 2
74 10/30/2021 1
75 10/31/2009 4
76 10/31/2013 1
77 10/31/2015 2
78 10/31/2021 1
79 10/4/2008 1
80 10/4/2014 4
81 10/5/2013 3
82 10/5/2019 5
83 10/5/2021 1
84 10/5/2024 2
85 10/6/2007 2
86 10/6/2012 1
87 10/6/2018 10
88 10/7/2017 7
89 10/7/2022 1
90 10/7/2023 2
91 10/8/2015 2
92 10/8/2016 3
93 10/8/2023 1
94 10/9/2004 1
95 10/9/2010 1
96 10/9/2021 1
97 10/9/2022 1
98 11/1/2003 1
99 11/1/2008 1
100 11/1/2014 3
101 11/10/2007 3
102 11/10/2012 3
103 11/10/2016 1
104 11/10/2018 10
105 11/11/2006 2
106 11/11/2017 3
107 11/11/2022 1
108 11/11/2023 1
109 11/12/2011 1
110 11/12/2016 8
111 11/12/2022 4
112 11/12/2023 1
113 11/13/2004 1
114 11/13/2010 2
115 11/13/2015 1
116 11/13/2021 2
117 11/13/2022 1
118 11/14/2009 3
119 11/14/2015 6
120 11/14/2020 2
121 11/14/2022 1
122 11/15/2008 1
123 11/15/2014 4
124 11/16/2013 2
125 11/16/2019 2
126 11/16/2024 1
127 11/17/2007 3
128 11/17/2012 1
129 11/17/2018 9
130 11/18/2006 1
131 11/18/2017 9
132 11/18/2018 2
133 11/18/2023 1
134 11/19/2005 1
135 11/19/2011 1
136 11/19/2016 7
137 11/19/2022 2
138 11/2/2019 4
139 11/2/2024 2
140 11/20/2010 1
141 11/20/2021 1
142 11/21/2009 2
143 11/21/2015 7
144 11/21/2020 1
145 11/21/2022 1
146 11/22/2003 1
147 11/22/2008 2
148 11/22/2014 6
149 11/23/2007 1
150 11/23/2012 1
151 11/23/2013 5
152 11/23/2018 2
153 11/23/2019 6
154 11/23/2024 4
155 11/24/2007 1
156 11/24/2012 4
157 11/24/2017 1
158 11/24/2018 4
159 11/24/2023 1
160 11/25/2006 1
161 11/25/2016 3
162 11/25/2017 8
163 11/25/2022 1
164 11/25/2023 3
165 11/26/2011 1
166 11/26/2016 4
167 11/26/2021 1
168 11/26/2022 1
169 11/27/2009 1
170 11/27/2010 1
171 11/27/2020 1
172 11/27/2021 1
173 11/28/2015 9
174 11/28/2021 1
175 11/29/2008 1
176 11/29/2014 3
177 11/3/2007 2
178 11/3/2012 4
179 11/3/2016 1
180 11/3/2018 9
181 11/30/2013 2
182 11/30/2019 7
183 11/30/2024 1
184 11/4/2006 1
185 11/4/2011 1
186 11/4/2017 7
187 11/4/2022 1
188 11/4/2023 3
189 11/5/2005 1
190 11/5/2011 1
191 11/5/2016 6
192 11/6/2004 1
193 11/6/2021 1
194 11/6/2022 1
195 11/7/2009 2
196 11/7/2015 11
197 11/7/2020 2
198 11/8/2008 2
199 11/8/2014 1
200 11/9/2013 1
201 11/9/2019 8
202 11/9/2024 2
203 12/18/2020 1
204 12/21/2024 2
205 12/23/2019 1
206 12/5/2020 1
207 12/6/2014 1
208 12/6/2020 1
209 2/9/2016 1
210 5/11/2016 1
211 8/10/2016 1
212 8/17/2016 1
213 8/29/2019 2
214 8/29/2021 1
215 8/29/2022 1
216 8/29/2024 2
217 8/30/2008 2
218 8/30/2014 3
219 8/31/2013 4
220 8/31/2017 1
221 8/31/2018 2
222 8/31/2019 7
223 8/31/2023 1
224 8/31/2024 3
225 9/1/2007 1
226 9/1/2016 2
227 9/1/2018 12
228 9/1/2022 1
229 9/10/2005 1
230 9/10/2011 2
231 9/10/2016 8
232 9/10/2022 4
233 9/11/2010 1
234 9/11/2016 1
235 9/11/2021 2
236 9/11/2022 1
237 9/12/2009 3
238 9/12/2015 9
239 9/12/2020 1
240 9/13/2008 2
241 9/13/2014 6
242 9/14/2013 2
243 9/14/2019 6
244 9/14/2024 4
245 9/15/2007 2
246 9/15/2012 3
247 9/15/2018 17
248 9/16/2006 2
249 9/16/2014 2
250 9/16/2016 1
251 9/16/2017 6
252 9/16/2022 1
253 9/16/2023 3
254 9/17/2005 1
255 9/17/2016 7
256 9/17/2022 4
257 9/17/2023 1
258 9/18/2004 1
259 9/18/2010 2
260 9/18/2021 4
261 9/18/2022 1
262 9/19/2009 2
263 9/19/2015 9
264 9/2/2006 3
265 9/2/2010 1
266 9/2/2015 1
267 9/2/2017 4
268 9/2/2023 2
269 9/20/2003 1
270 9/20/2008 2
271 9/20/2014 2
272 9/20/2017 2
273 9/20/2019 1
274 9/21/2013 4
275 9/21/2019 7
276 9/21/2024 4
277 9/22/2007 3
278 9/22/2012 4
279 9/22/2018 8
280 9/23/2006 3
281 9/23/2017 10
282 9/23/2022 2
283 9/23/2023 1
284 9/24/2005 1
285 9/24/2011 1
286 9/24/2016 5
287 9/24/2022 2
288 9/24/2023 1
289 9/25/2010 1
290 9/25/2021 3
291 9/26/2009 2
292 9/26/2015 6
293 9/26/2022 1
294 9/27/2003 1
295 9/27/2008 2
296 9/27/2014 1
297 9/28/2013 1
298 9/28/2018 1
299 9/28/2019 3
300 9/28/2024 2
301 9/29/2007 1
302 9/29/2012 1
303 9/29/2017 1
304 9/29/2018 7
305 9/3/2011 1
306 9/3/2016 7
307 9/3/2022 3
308 9/3/2023 1
309 9/30/2006 1
310 9/30/2017 6
311 9/30/2023 2
312 9/4/2016 1
313 9/4/2021 2
314 9/5/2004 1
315 9/5/2009 2
316 9/5/2015 8
317 9/5/2021 1
318 9/6/2003 1
319 9/6/2008 1
320 9/6/2009 1
321 9/6/2014 3
322 9/6/2019 2
323 9/7/2007 1
324 9/7/2013 3
325 9/7/2019 12
326 9/7/2024 4
327 9/8/2007 3
328 9/8/2012 5
329 9/8/2018 12
330 9/9/2006 2
331 9/9/2017 7
332 9/9/2022 2
333 9/9/2023 3
334 Aug. 29 1
335 aug. 30 2
336 Aug. 30 1
337 aug. 31 1
338 dec. 1 1
339 nov. 10 4
340 nov. 11 2
341 Nov. 11 1
342 nov. 12 2
343 Nov. 12, 2016 1
344 nov. 13 1
345 nov. 14 3
346 nov. 15 1
347 Nov. 15 1
348 nov. 16 3
349 Nov. 16 1
350 nov. 17 2
351 Nov. 17 1
352 nov. 18 3
353 Nov. 18 1
354 nov. 19 4
355 nov. 2 1
356 Nov. 2 1
357 nov. 20 1
358 Nov. 20 1
359 nov. 21 2
360 nov. 22 1
361 nov. 23 1
362 nov. 24 4
363 Nov. 24 1
364 nov. 25 3
365 nov. 26 1
366 Nov. 26, 2016 1
367 nov. 28 2
368 nov. 29 1
369 Nov. 29 1
370 nov. 3 3
371 Nov. 3 1
372 nov. 30 4
373 nov. 4 1
374 nov. 5 4
375 Nov. 5, 2016 1
376 nov. 6 1
377 nov. 7 2
378 nov. 8 2
379 nov. 9 3
380 oct. 1 3
381 Oct. 1 1
382 Oct. 1, 2016 1
383 oct. 10 2
384 oct. 11 1
385 oct. 12 1
386 Oct. 12 1
387 oct. 13 3
388 oct. 14 3
389 oct. 15 2
390 Oct. 15, 2016 1
391 oct. 17 3
392 oct. 18 1
393 oct. 2 1
394 oct. 20 1
395 Oct. 20 1
396 oct. 21 2
397 oct. 22 2
398 oct. 24 2
399 oct. 25 1
400 oct. 26 1
401 Oct. 26 1
402 oct. 27 1
403 oct. 28 2
404 Oct. 28 1
405 oct. 29 3
406 oct. 3 4
407 oct. 30 2
408 oct. 31 2
409 oct. 4 2
410 oct. 5 3
411 oct. 6 3
412 oct. 7 1
413 Oct. 7 1
414 oct. 8 4
415 oct. 9 2
416 Oct.18 1
417 sept 25. 1
418 sept. 1 3
419 Sept. 1 1
420 sept. 10 3
421 Sept. 10, 2016 1
422 sept. 12 4
423 Sept. 13 1
424 sept. 14 1
425 sept. 15 1
426 Sept. 15 1
427 sept. 16 3
428 sept. 17 5
429 Sept. 17, 2016 1
430 sept. 18 2
431 sept. 19 3
432 sept. 2 3
433 Sept. 2 1
434 sept. 20 1
435 Sept. 20 1
436 sept. 21 2
437 Sept. 21 1
438 sept. 22 5
439 sept. 23 2
440 Sept. 23 1
441 sept. 24 1
442 sept. 26 2
443 sept. 28 2
444 sept. 29 5
445 Sept. 29 1
446 sept. 3 3
447 sept. 30 2
448 sept. 4 1
449 sept. 5 2
450 sept. 6 1
451 sept. 7 5
452 Sept. 7 1
453 sept. 8 1
454 sept. 9 5
455 Sept. 9 1
456 sept.1 1
ggplot (frequency_table_Date, aes (x = Var1, y = Freq)) +
geom_bar (stat = "identity" , fill = "black" ) +
labs (title = "Barplot for Bar Plot of Date Categories" ,
x = "Categories" ,
y = "Frequency" ) +
theme_classic ()+
theme (axis.text.x = element_text (angle = 90 , hjust = 1 , size = 1 ))
Time Central 0
frequency_table_time_0 <- table (data$ time_0) %>% data.frame ()
frequency_table_time_0
Var1 Freq
1 0 462
2 1 392
3 2 371
ggplot (frequency_table_time_0, aes (x = Var1, y = Freq)) +
geom_bar (stat = "identity" , fill = "black" ) +
labs (title = "Barplot for Bar Plot of Time Central 0 Categories" ,
x = "Categories" ,
y = "Frequency" ) +
theme_classic ()+
theme (axis.text.x = element_text (hjust = 1 , size = 8 ))
Game Result
frequency_table_game_result <- table (data$ game_result) %>% data.frame ()
frequency_table_game_result
Var1 Freq
1 0.0 384
2 1.0 850
3 N/A 1
ggplot (frequency_table_game_result, aes (x = Var1, y = Freq)) +
geom_bar (stat = "identity" , fill = "black" ) +
labs (title = "Barplot for Bar Plot of Game Result Categories" ,
x = "Categories" ,
y = "Frequency" ) +
theme_classic ()+
theme (axis.text.x = element_text (hjust = 1 , size = 8 ))
Game time
frequency_table_game_time <- table (data$ game_time) %>% data.frame ()
frequency_table_game_time
Var1 Freq
1 1899-12-31 09:00:00 1
2 1899-12-31 10:00:00 2
3 1899-12-31 11:00:00 60
4 1899-12-31 11:05:00 2
5 1899-12-31 11:30:00 7
6 1899-12-31 12:00:00 301
7 1899-12-31 12:05:00 1
8 1899-12-31 12:10:00 2
9 1899-12-31 12:15:00 1
10 1899-12-31 12:20:00 17
11 1899-12-31 12:30:00 71
12 1899-12-31 12:45:00 1
13 1899-12-31 13:00:00 17
14 1899-12-31 13:30:00 16
15 1899-12-31 13:45:00 1
16 1899-12-31 14:00:00 8
17 1899-12-31 14:30:00 51
18 1899-12-31 14:35:00 3
19 1899-12-31 14:40:00 1
20 1899-12-31 15:00:00 24
21 1899-12-31 15:15:00 1
22 1899-12-31 15:30:00 213
23 1899-12-31 15:45:00 3
24 1899-12-31 16:00:00 46
25 1899-12-31 16:05:00 1
26 1899-12-31 16:20:00 1
27 1899-12-31 16:30:00 10
28 1899-12-31 17:00:00 21
29 1899-12-31 17:30:00 8
30 1899-12-31 17:40:00 1
31 1899-12-31 17:51:00 1
32 1899-12-31 18:00:00 54
33 1899-12-31 18:02:00 1
34 1899-12-31 18:05:00 1
35 1899-12-31 18:06:00 1
36 1899-12-31 18:15:00 1
37 1899-12-31 18:30:00 18
38 1899-12-31 18:40:00 1
39 1899-12-31 18:45:00 1
40 1899-12-31 18:50:00 2
41 1899-12-31 19:00:00 97
42 1899-12-31 19:14:00 1
43 1899-12-31 19:15:00 3
44 1899-12-31 19:30:00 105
45 1899-12-31 19:35:00 1
46 1899-12-31 19:45:00 9
47 1899-12-31 20:00:00 47
48 1899-12-31 20:10:00 1
49 1899-12-31 20:20:00 2
50 1899-12-31 20:30:00 1
ggplot (frequency_table_game_time, aes (x = Var1, y = Freq)) +
geom_bar (stat = "identity" , fill = "black" ) +
labs (title = "Barplot for Bar Plot of Game time Categories" ,
x = "Categories" ,
y = "Frequency" ) +
theme_classic ()+
theme (axis.text.x = element_text (angle = 90 , hjust = 1 , size = 5 ))
Numerical Variables
Year
ggplot (data = data, aes (x = year)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Year" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Tenure Year
ggplot (data = data, aes (x = tenure_year)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Tenure Year" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Season Game
ggplot (data = data, aes (x = S_Game)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Season Game" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()+
scale_x_continuous (limits = c (0 , 10 ))
Warning: Removed 136 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_bar()`).
Stadium Waste Diversion
ggplot (data = data, aes (x = s_diversion)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Stadium Waste Diversion" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 19 rows containing non-finite outside the scale range
(`stat_bin()`).
Attendance
ggplot (data = data, aes (x = attendance)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Attendance" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_bin()`).
Conference Code
ggplot (data = data, aes (x = confCode)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Attendance" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 307 rows containing non-finite outside the scale range
(`stat_bin()`).
School ID
ggplot (data = data, aes (x = school_ID)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of School ID" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 307 rows containing non-finite outside the scale range
(`stat_bin()`).
Year Central 0
ggplot (data = data, aes (x = year_0)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Year Central 0" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 307 rows containing non-finite outside the scale range
(`stat_bin()`).
Tenure Year Central 0
ggplot (data = data, aes (x = tenure_0)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Tenure Year Central 0" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 307 rows containing non-finite outside the scale range
(`stat_bin()`).
Game Number 1
ggplot (data = data, aes (x = game_number)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Game Number 1" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 292 rows containing non-finite outside the scale range
(`stat_bin()`).
Game Number 2
ggplot (data = data, aes (x = game_number2)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Game Number 2" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 292 rows containing non-finite outside the scale range
(`stat_bin()`).
Game central 0
ggplot (data = data, aes (x = game_0)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Game central 0" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 307 rows containing non-finite outside the scale range
(`stat_bin()`).
Game Minutes
ggplot (data = data, aes (x = game_min)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Game Minutes" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 292 rows containing non-finite outside the scale range
(`stat_bin()`).
Game time hours 0
ggplot (data = data, aes (x = game_time_hr_0)) +
geom_histogram (binwidth = 1 ,fill = "black" , color = "black" ) +
labs (title = "Histogram of Game time hours 0" , x = "Variable Value" , y = "Frequency" )+
theme_classic ()
Warning: Removed 292 rows containing non-finite outside the scale range
(`stat_bin()`).