# A tibble: 15 × 19
conference confCode school school_ID year year_0 tenure_year tenure_0
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Pac12 NA USC NA 2020 NA 6 NA
2 Pac12 NA USC NA 2020 NA 6 NA
3 Pac12 NA USC NA 2020 NA 6 NA
4 SEC NA Georgia NA 2020 NA 6 NA
5 SEC NA Georgia NA 2020 NA 6 NA
6 SEC NA Georgia NA 2020 NA 6 NA
7 SEC NA Kentucky NA 2020 NA 5 NA
8 SEC NA Kentucky NA 2020 NA 5 NA
9 SEC NA Kentucky NA 2020 NA 5 NA
10 SEC NA Kentucky NA 2020 NA 5 NA
11 SEC NA Kentucky NA 2020 NA 5 NA
12 Big10 NA Penn State NA 2021 NA 13 NA
13 Big10 NA Penn State NA 2021 NA 13 NA
14 Big10 NA Penn State NA 2021 NA 13 NA
15 Big10 NA Penn State NA 2021 NA 13 NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
# s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
# game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
# A tibble: 15 × 19
conference confCode school school_ID year year_0 tenure_year tenure_0
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Pac12 3 USC 27 2020 NA 6 NA
2 Pac12 3 USC 27 2020 NA 6 NA
3 Pac12 3 USC 27 2020 NA 6 NA
4 SEC 4 Georgia 8 2020 NA 6 NA
5 SEC 4 Georgia 8 2020 NA 6 NA
6 SEC 4 Georgia 8 2020 NA 6 NA
7 SEC 4 Kentucky 11 2020 NA 5 NA
8 SEC 4 Kentucky 11 2020 NA 5 NA
9 SEC 4 Kentucky 11 2020 NA 5 NA
10 SEC 4 Kentucky 11 2020 NA 5 NA
11 SEC 4 Kentucky 11 2020 NA 5 NA
12 Big10 1 Penn State 20 2021 NA 13 NA
13 Big10 1 Penn State 20 2021 NA 13 NA
14 Big10 1 Penn State 20 2021 NA 13 NA
15 Big10 1 Penn State 20 2021 NA 13 NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
# s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
# game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
subset(Collected_Data_clean, is.na(tenure_0))
# A tibble: 15 × 19
conference confCode school school_ID year year_0 tenure_year tenure_0
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Pac12 3 USC 27 2020 NA 6 NA
2 Pac12 3 USC 27 2020 NA 6 NA
3 Pac12 3 USC 27 2020 NA 6 NA
4 SEC 4 Georgia 8 2020 NA 6 NA
5 SEC 4 Georgia 8 2020 NA 6 NA
6 SEC 4 Georgia 8 2020 NA 6 NA
7 SEC 4 Kentucky 11 2020 NA 5 NA
8 SEC 4 Kentucky 11 2020 NA 5 NA
9 SEC 4 Kentucky 11 2020 NA 5 NA
10 SEC 4 Kentucky 11 2020 NA 5 NA
11 SEC 4 Kentucky 11 2020 NA 5 NA
12 Big10 1 Penn State 20 2021 NA 13 NA
13 Big10 1 Penn State 20 2021 NA 13 NA
14 Big10 1 Penn State 20 2021 NA 13 NA
15 Big10 1 Penn State 20 2021 NA 13 NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
# s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
# game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>%filter(year=="2020")
# A tibble: 11 × 19
conference confCode school school_ID year year_0 tenure_year tenure_0
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Pac12 3 USC 27 2020 NA 6 NA
2 Pac12 3 USC 27 2020 NA 6 NA
3 Pac12 3 USC 27 2020 NA 6 NA
4 SEC 4 Georgia 8 2020 NA 6 NA
5 SEC 4 Georgia 8 2020 NA 6 NA
6 SEC 4 Georgia 8 2020 NA 6 NA
7 SEC 4 Kentucky 11 2020 NA 5 NA
8 SEC 4 Kentucky 11 2020 NA 5 NA
9 SEC 4 Kentucky 11 2020 NA 5 NA
10 SEC 4 Kentucky 11 2020 NA 5 NA
11 SEC 4 Kentucky 11 2020 NA 5 NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
# s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
# game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Collected_Data_clean %>%filter(year=="2021")
# A tibble: 4 × 19
conference confCode school school_ID year year_0 tenure_year tenure_0
<chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Big10 1 Penn State 20 2021 NA 13 NA
2 Big10 1 Penn State 20 2021 NA 13 NA
3 Big10 1 Penn State 20 2021 NA 13 NA
4 Big10 1 Penn State 20 2021 NA 13 NA
# ℹ 11 more variables: game_number <dbl>, game_number2 <dbl>, game_0 <dbl>,
# s_diversion <dbl>, attendance <dbl>, game_time <chr>, game_min <dbl>,
# game_time_hr_0 <dbl>, time_0 <chr>, game_result <chr>, Date <chr>
Create final dataset
data <-right_join(Collected_Data_old_renamed, Collected_Data_clean, by=c("conference", "school","year", "tenure_year", "s_diversion", "attendance" , "Date"))
Var1 Freq
1 Arizona State 33
2 Arkansas 6
3 Auburn 36
4 Clemson 28
5 Colorado University 59
6 Duke 38
7 Florida 43
8 Georgia 36
9 Georgia Tech 33
10 Iowa 20
11 Kentucky 34
12 LSU 37
13 Maryland 29
14 Michigan 113
15 Michigan State 21
16 Minnesota 21
17 NC State 31
18 Ohio State 91
19 Oklahoma 24
20 Penn State 82
21 Purdue 55
22 Stanford 32
23 Tennessee 77
24 UCLA 7
25 UNC 100
26 UoTexas 9
27 USC 16
28 Washington 32
29 Washington State 42
30 Wisconsin 55
ggplot(frequency_table_school, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of School Categories",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(angle =90, hjust =1, size =8))
ggplot(frequency_table_Date, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of Date Categories",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(angle =90, hjust =1, size =1))
ggplot(frequency_table_time_0, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of Time Central 0 Categories",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(hjust =1, size =8))
ggplot(frequency_table_game_result, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of Game Result Categories",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(hjust =1, size =8))
ggplot(frequency_table_game_time, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of Game time Categories",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(angle =90, hjust =1, size =5))
Numerical Variables
Year
ggplot(data = data, aes(x =year)) +geom_histogram(binwidth =1,fill ="black", color ="black") +labs(title ="Histogram of Year", x ="Variable Value", y ="Frequency")+theme_classic()
ggplot(frequency_table_tenure_year, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of Tenure Year",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(angle =90, hjust =1, size =8))
Season Game
hist(data$S_Game, main ="Histogram of Season Game", xlab ="Variable Value", ylab ="Frequency")
ggplot(data = data, aes(x =S_Game)) +geom_histogram(binwidth =1,fill ="black", color ="black") +labs(title ="Histogram of Season Game", x ="Variable Value", y ="Frequency")+theme_classic()+scale_x_continuous(limits =c(1, 10), breaks =seq(1,10))
Warning: Removed 136 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_bar()`).
ggplot(data = data, aes(x =s_diversion)) +geom_histogram(fill ="black", color ="black") +labs(title ="Histogram of Stadium Waste Diversion", x ="Variable Value", y ="Frequency")+theme_classic()+scale_x_continuous(limits =c(0,1) )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 5 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_bar()`).
Attendance
hist(data$attendance, main ="Histogram of Attendance", xlab ="Variable Value", ylab ="Frequency")
ggplot(data = data, aes(x =attendance)) +geom_histogram(fill ="black", color ="black") +labs(title ="Histogram of Attendance", x ="Variable Value", y ="Frequency")+theme_classic()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_bin()`).
Conference Code
ggplot(data = data, aes(x =confCode)) +geom_histogram(binwidth =1,fill ="black", color ="black") +labs(title ="Histogram of Attendance", x ="Variable Value", y ="Frequency")+theme_classic()
ggplot(frequency_table_conference_code, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of Tenure Year Central 0",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(hjust =1, size =8))
School ID
ggplot(data = data, aes(x =school_ID)) +geom_histogram(binwidth =1,fill ="black", color ="black") +labs(title ="Histogram of School ID", x ="Variable Value", y ="Frequency")+theme_classic()
ggplot(frequency_table_school_ID, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of Tenure Year Central 0",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(hjust =1, size =8))
Year Central 0
ggplot(data = data, aes(x =year_0)) +geom_histogram(binwidth =1,fill ="black", color ="black") +labs(title ="Histogram of Year Central 0", x ="Variable Value", y ="Frequency")+theme_classic()
Warning: Removed 15 rows containing non-finite outside the scale range
(`stat_bin()`).
ggplot(frequency_table_year_0, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of Tenure Year Central 0",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(hjust =1, size =8))
Tenure Year Central 0
ggplot(data = data, aes(x =tenure_0)) +geom_histogram(binwidth =1,fill ="black", color ="black") +labs(title ="Histogram of Tenure Year Central 0", x ="Variable Value", y ="Frequency")+theme_classic()
Warning: Removed 15 rows containing non-finite outside the scale range
(`stat_bin()`).
ggplot(frequency_table_tenure_year_0, aes(x = Var1, y = Freq)) +geom_bar(stat ="identity", fill ="black") +labs(title ="Barplot for Bar Plot of Tenure Year Central 0",x ="Categories",y ="Frequency") +theme_classic()+theme(axis.text.x =element_text(hjust =1, size =8))
Game Number
hist(data$game_number,main ="Histogram of Game Number", xlab ="Variable Value", ylab ="Frequency")
ggplot(data = data, aes(x =game_number)) +geom_histogram(fill ="black", color ="black") +labs(title ="Histogram of Game Number", x ="Variable Value", y ="Frequency")+theme_classic()+scale_x_continuous(limits=c(1,113))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_bar()`).
Game Number 2
hist(data$game_number2, main ="Histogram of Game Number 2", xlab ="Variable Value", ylab ="Frequency")
ggplot(data = data, aes(x =game_number2)) +geom_histogram(binwidth =1,fill ="black", color ="black") +labs(title ="Histogram of Game Number 2", x ="Variable Value", y ="Frequency")+theme_classic()
Game central 0
hist(data$game_0, main ="Histogram of Game central 0", xlab ="Variable Value", ylab ="Frequency")
ggplot(data = data, aes(x =game_0)) +geom_histogram(binwidth =1,fill ="black", color ="black") +labs(title ="Histogram of Game central 0", x ="Variable Value", y ="Frequency")+theme_classic()
Warning: Removed 15 rows containing non-finite outside the scale range
(`stat_bin()`).
Game Minutes
hist (data$game_min, main ="Histogram of Game Minutes", xlab ="Variable Value", ylab ="Frequency")
ggplot(data = data, aes(x =game_min)) +geom_histogram(fill ="black", color ="black") +labs(title ="Histogram of Game Minutes", x ="Variable Value", y ="Frequency")+theme_classic()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Game time hours 0
hist(data$game_time_hr_0, main ="Histogram of Game time hours 0", xlab ="Variable Value", ylab ="Frequency")
ggplot(data = data, aes(x =game_time_hr_0)) +geom_histogram(fill ="black", color ="black") +labs(title ="Histogram of Game time hours 0", x ="Variable Value", y ="Frequency")+theme_classic()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.