Quantitatively evaluating CFB Coaches

The goal of this project was to create a quantitative way of ranking coaches. The idea was that coaches would be rating on three main categories which make up a composite score. The three categories are as follow…

Project Set-Up

Clustering Set-Up

In order to help determine what expectations should be at certain programs we create our own pseudo-conferences of programs that are roughly on the same tier. It’s not fool proof but it’s better than using conferences or subjective tiers.

teamInfo <- cfbd_team_info()
coachesDf <- cfbd_coaches() %>% filter(games > 3)
talent <- cfbd_team_talent()

fpi12 <- espn_ratings_fpi(year = 2012)
fpi13 <- espn_ratings_fpi(year = 2013)
fpi14 <- espn_ratings_fpi(year = 2014)
fpi15 <- espn_ratings_fpi(year = 2015)
fpi16 <- espn_ratings_fpi(year = 2016)
fpi17 <- espn_ratings_fpi(year = 2017)
fpi18 <- espn_ratings_fpi(year = 2018)
fpi19 <- espn_ratings_fpi(year = 2019)
fpi20 <- espn_ratings_fpi(year = 2020)
fpi21 <- espn_ratings_fpi(year = 2021)
    
fpi <- rbind(fpi17, fpi18, fpi19, fpi20, fpi21)

# Compiling Data
rec12 <- cfbd_recruiting_team(year = 2012)
rec13 <- cfbd_recruiting_team(year = 2013)
rec14 <- cfbd_recruiting_team(year = 2014)
rec15 <- cfbd_recruiting_team(year = 2015)
rec16 <- cfbd_recruiting_team(year = 2016)
rec17 <- cfbd_recruiting_team(year = 2017)
rec18 <- cfbd_recruiting_team(year = 2018)
rec19 <- cfbd_recruiting_team(year = 2019)
rec20 <- cfbd_recruiting_team(year = 2020)
rec21 <- cfbd_recruiting_team(year = 2021)
    
#espn_cfb_schedule(year = 2018) -> sch18 (Not available)
espn_cfb_schedule(year = 2019) -> sch19
espn_cfb_schedule(year = 2020) -> sch20
espn_cfb_schedule(year = 2021) -> sch21

## Year Talent Info
draftDF <- data.frame()
    
for(i in 2015:2021) {
    cfbd_draft_picks(year = i) %>% 
        select(college_team, name, pre_draft_grade) %>% 
        group_by(college_team) %>% 
        summarise_at(vars(pre_draft_grade), list(sum)) %>% 
        arrange(-pre_draft_grade) %>% 
        rename(totalGrade = pre_draft_grade) -> temp
    
    temp$year <- i
    
    draftDF <- rbind(draftDF, temp)
}
    
clusteringFPI <- rbind(fpi12, fpi13, fpi14, fpi15, fpi16, fpi17, fpi18, fpi19, fpi20, fpi21)
recruitingFPI <- rbind(rec12, rec13, rec14, rec15, rec16, rec17, rec18, rec19, rec20, rec21)
scheduleClust <- rbind(sch19, sch20, sch21)
# Number of primetime games played will be used to compare teams
scheduleClust %>% 
    filter(broadcast_name == "ESPN" | broadcast_name == "CBS" | 
               broadcast_name == "FOX" | broadcast_name == "ABC") %>% 
    mutate(home_team_id1 = as.numeric(home_team_id), count = 1) %>% 
    group_by(home_team_id) %>% 
    summarise_at(vars(count), list(sum)) -> homePrimetime
    
scheduleClust %>% 
    filter(broadcast_name == "ESPN" | broadcast_name == "CBS" |
               broadcast_name == "FOX" | broadcast_name == "ABC") %>% 
    mutate(home_team_id1 = as.numeric(home_team_id), count = 1) %>% 
    group_by(away_team_id) %>% 
    summarise_at(vars(count), list(sum)) -> awayPrimetime
  
# Counting number of times teams played in primetime
left_join(homePrimetime, awayPrimetime, by = c("home_team_id" = "away_team_id")) %>% 
    mutate(count.y = if_else(is.na(count.y), 0, count.y),
           count.x = if_else(is.na(count.x), 0, count.x)) %>% 
    mutate(games =  count.x + count.y) %>% 
    select(-c(count.x, count.y)) %>% 
    rename(team_id = home_team_id) -> primeTimeGames

# Stadium Capacity can be used to measure school/fan investment
teamInfo %>% 
    select(school, capacity, latitude, longitude) -> info

# What kind of talent do these schools draw?
recruitingFPI %>% 
    select(school = team, points, year) -> recFpi

#How many Coaches have these schools had in the last 12 years?
coachesDf$one <- 1

coachesDf %>% 
    mutate(Coach = paste(first_name, last_name)) %>% 
    # Weird data abnormality, no special significance
    mutate(school = if_else(school == "SOUTH FLORIDA", "South Florida", school)) -> coaches1
    
schools <- unique(coaches1$school)
    
coach_list <- data.frame()
    
for(i in schools) {
    coaches1 %>% 
        filter(school == i) %>% 
        mutate(newCoach = ifelse(Coach == lag(Coach), 0, 1)) -> temp
    
    coach_list <- rbind(coach_list, temp)
}

coach_list %>% 
    filter(year >= 2010) %>% 
    drop_na(newCoach) %>% 
    group_by(school) %>% 
    summarise(Coaches = sum(newCoach)) -> coach_list1

# What kind of NFL talent do these schools produce?
draftDF %>% 
    group_by(college_team, year) %>% 
    summarise_at(vars(totalGrade), list(mean)) %>% 
    mutate(totalGrade = if_else(is.na(totalGrade), 0, totalGrade)) %>% 
    summarise_at(vars(totalGrade), list(mean)) -> draftGradeSums

# Weeks spent in the top 2 after week 9 since 1950
rankings <- data.frame()

for(i in 1950:2022) {
    temp <- cfbd_rankings(year = i)
    
    rankings <- rbind(rankings, temp)
}

rankings %>% 
    filter(rank <= 2 & poll == "AP Top 25" & week > 11) %>% 
    pull(school) %>% 
    table() %>% 
    data.frame() %>% 
    rename(school = 1) -> top1

clusteringFPI %>% 
    select(year, team_id, name, fpi) %>% 
    rename(school = name) %>% 
    left_join(primeTimeGames) %>% 
    mutate(games = if_else(is.na(games), 0, games)) %>% 
    left_join(coach_list1) %>% 
    left_join(info) %>% 
    left_join(recFpi, by = c("school" = "school", "year" = "year")) %>% 
    drop_na(capacity, points, games, fpi) %>% 
    mutate(team_id = as.numeric(team_id), fpi = scale(as.numeric(fpi)), 
           points = scale(as.numeric(points))) %>% 
    group_by(school) %>% 
    summarise_at(vars(team_id, fpi, capacity, longitude, latitude, points, games, Coaches), 
                 list(mean, sd)) %>% 
    select(school, capacity = capacity_fn1, rec_avg = points_fn1, fpi_avg = fpi_fn1, 
           fpi_sd = fpi_fn2, prime_games = games_fn1, 
           coaches = Coaches_fn1, lat = latitude_fn1, long = longitude_fn1) %>% 
    left_join(draftGradeSums, by = c("school" = "college_team")) %>% 
    left_join(top1) %>% 
    mutate(totalGrade = if_else(is.na(totalGrade), 0, totalGrade),
           Freq = if_else(is.na(Freq), 0, as.double(Freq))) %>% 
    drop_na(fpi_sd, lat, long)-> cluster
## Joining, by = "team_id"
## Joining, by = "school"
## Joining, by = "school"
## Joining, by = "school"
cluster %>% 
    mutate(lat = if_else(school == "Hawaii", 32.7157, lat),
           long = if_else(school == "Hawaii", -117.1611 , lat)) %>% 
    mutate(prime_games = scale(prime_games), Freq = scale(Freq), lat = scale(lat),
           long = scale(long), capacity = scale(capacity)) %>%
    select(school, capacity, rec_avg, Freq, fpi_avg, fpi_sd, prime_games, coaches, lat, long) %>% 
    select(-c(rec_avg, fpi_avg, fpi_sd, coaches))-> cluster2

cluster2 %>% pull(school) -> schools
    
# Clustering can only be done with quantitative data, so it's important we keep 
# the names attached somehow, turning tibble to a dataframe

cluster2 %>% 
    select(-school) %>% as.data.frame() -> cluster3
    
row.names(cluster3) = schools

Regression Set-Up

Clustering

Here we use K-means clustering to create distinct program clusters, which help establish program expectations

## Determining Optimal Number of Clusters and Creating Clusters
set.seed(13)

fviz_nbclust(cluster3, FUNcluster = kmeans, )

# 9 clusters is determined to be the optimal amount
k <- kmeans(cluster3, 6, nstart = 50)

We probably need more than 2 different clusters to establish distinct tiers so we go with 6 total clusters, which still creates really distinct clusters for us.

## Visualizing Clusters and Attaching Clusters to our data used in our regression analysis 
fviz_cluster(k, data = cluster3, ggtheme = theme_minimal(), repel = TRUE, 
             palette = "Dark2", xlab = FALSE, ylab = FALSE, main = "Our 6 new 'Pseudo-Conferences'")
## Warning: ggrepel: 68 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

cbind(cluster3, cluster = k$cluster) %>% 
    mutate(school = row.names(cluster3)) %>% 
    as_tibble() %>% 
    select(school, cluster) -> clusters

Preparing Data for Regression Models

Now that we have established some sort of baseline level of expectations for each program, we prepare other data to get scores for each coach in the 3 aforementioned areas.

## Home Game Info
    
gms21 <- cfbd_game_info(year = 2021)
gms19 <- cfbd_game_info(year = 2019)
gms18 <- cfbd_game_info(year = 2018)
gms17 <- cfbd_game_info(year = 2017)
gms16 <- cfbd_game_info(year = 2016)
    
games <- rbind(gms16, gms17, gms18, gms19, gms21)
   
# Gathering teams' average attendance 
games %>% 
    group_by(home_team) %>% 
    summarise_at(vars(attendance), list(mean)) %>% drop_na(attendance) -> attendance
# At this point, I'm not sure if I had already done this but this is me gathering data for the regression models
left_join(coachesDf, talent) %>% 
    mutate(CoachName = paste(first_name, last_name)) %>% 
    select(CoachName, hire_date, school, year, talent) %>% 
    filter(year >= 2015) -> coach_talent
## Joining, by = c("school", "year")
coach_talent %>% 
    right_join(fpi, by = c("school" = "name", "year" = "year")) %>% 
    left_join(teamInfo, by = c("school" = 'school')) %>% 
    left_join(attendance, by = c("school" = "home_team")) %>% 
    left_join(draftDF, by = c("year" = "year", "school" = "college_team")) %>% 
    left_join(clusters) %>% 
    mutate(clusters = as.factor(cluster),
           talent = if_else(is.na(talent), 0, talent)) -> fullCoachData
## Joining, by = "school"

Using Linear Regression to Create Scores

Talent Maximization Score

The idea here is that we are trying to predict a team’s success (measured by FPI) with two variables, the NFL draft talent on a roster as well as the coach. the ‘totalGrade’ variable will be a big contributor, but good coaches will turn this talent into a higher FPI more often.

talentFpiModel <- lm(fpi ~ CoachName + totalGrade, data = fullCoachData)
    
talentFpiModel$coefficients %>% as.data.frame() %>% head(205) %>% slice(-1) -> talFpi_coeff
    
row.names(talFpi_coeff) -> coaches

talFpiCoeff <- tibble(coaches, talFpi_coeff$.)

talFpiCoeff %>% 
    mutate(coaches = substring(coaches, 10)) %>% 
    right_join(fullCoachData, by = c("coaches" = "CoachName")) %>% 
    rename(talFpiCoeff = `talFpi_coeff$.`) %>% 
    arrange(-talFpiCoeff) %>% 
    select(coaches, talFpiCoeff, school, year) %>% 
    group_by(school) %>% 
    #filter(year == 2022) %>% 
    slice(which.max(year)) %>% 
    drop_na(coaches) -> tfd2
    
# Rescaling these coefficients to create a score from 0 for the worst coach at mazimizing NFL talent, to the best coach
tfd2 %>% 
    cbind(talFpiScore = scales::rescale(tfd2$talFpiCoeff, by = c(0,100))*100) %>% 
    select(-talFpiCoeff) -> talFpiData

Player Development Score

Similar methodology as the score above but we want to see which coaches provide the biggest boost to the talent recruited to their schools.

coachModel <- lm(fpi ~ CoachName + talent, data = fullCoachData)
    
coachModel$coefficients %>% as.data.frame() %>% head(205) %>% slice(-1) -> coach_coeff
    
row.names(coach_coeff) -> coaches

coachCoeff <- tibble(coaches, coach_coeff$.)
    
coachCoeff %>% 
    mutate(coaches = substring(coaches, 10)) %>% 
    right_join(fullCoachData, by = c("coaches" = "CoachName")) %>% 
    rename(talentMaxCoeff = `coach_coeff$.`) %>% 
    arrange(-talentMaxCoeff) %>% 
    select(coaches, talentMaxCoeff, school, year, talent) %>% 
    group_by(coaches) -> cd

cd %>% 
    summarise_at(vars(talentMaxCoeff), list(mean)) -> cd1
    
cd %>% 
    left_join(cd1) %>% 
    group_by(school) %>% 
    slice(which.max(year)) %>% 
    drop_na(coaches) -> cd2
## Joining, by = c("coaches", "talentMaxCoeff")
cd2 %>% 
    cbind(talentMaxScore = scales::rescale(cd2$talentMaxCoeff, by = c(0,100))*100) %>% 
    right_join(cfbd_team_info()) %>% 
    unique() -> fullCoachData2
## Joining, by = "school"

Recruiting Score

When we consider who the best recruiters are, we must consider what type of program these schools are, thus we use the cluster variable. We also use attendance as a proxy for fan engagement, which may vary in clusters outside the most elite one or two.

rec17 <- cfbd_recruiting_team(year = 2017)
rec18 <- cfbd_recruiting_team(year = 2018)
rec19 <- cfbd_recruiting_team(year = 2019)
rec20 <- cfbd_recruiting_team(year = 2020)
rec21 <- cfbd_recruiting_team(year = 2021)

recruiting_data <- rbind(rec17,rec18,rec19,rec20,rec21)
    
    
fullCoachData %>% 
    left_join(recruiting_data, by = c("year" = "year", "school" = "team")) %>% 
    drop_na(capacity, attendance) %>% 
    filter(capacity > 0 & attendance > 0) %>% 
    mutate(attendanceRate = attendance/capacity) -> fullCoachData3
    
# Predicting 247 composite recruiting class rankings
talentModel <- lm(points ~ CoachName + attendance + cluster, data = fullCoachData3)
    
talentModel$coefficients %>% as.data.frame() -> talent_coeff
    
row.names(talent_coeff) -> talent

talentCoeff <- tibble(talent, talent_coeff$.)
    
talentCoeff %>% 
    rename(recruitingCoeff = `talent_coeff$.`) %>% 
    filter(grepl("CoachName", talent)) %>% 
    mutate(coaches = substring(talent, 10)) %>% 
    select(-talent) %>% 
    right_join(fullCoachData, by = c("coaches" = "CoachName")) %>% 
    arrange(-recruitingCoeff) %>% 
    select(coaches, recruitingCoeff, school, year, talent) %>% 
    group_by(coaches) -> td

td %>% 
    summarise_at(vars(recruitingCoeff), list(mean)) -> td1
    
td %>% 
    left_join(td1) %>% 
    group_by(school) %>% 
    slice(which.max(year)) %>% 
    drop_na(coaches, recruitingCoeff) -> td2
## Joining, by = c("coaches", "recruitingCoeff")
td2 %>% 
    cbind(recruitingScore = scales::rescale(td2$recruitingCoeff, by = c(0,100))*100) -> talentData2

Compiling Total Scores

## What coaches are still coaching in 2022, who switched jobs?
cfbd_coaches(year = 2022) -> coaches22

coaches22 %>% 
    mutate(name = paste(first_name, last_name)) %>% 
    left_join(teamInfo) %>% 
    select(School = school, Conference = conference, name) -> c22
## Joining, by = "school"
fullCoachData3 %>% 
    left_join(c22, by = c("coaches" = "name")) %>% 
    left_join(talFpiData1) %>% 
    left_join(talentData2) %>%
    ungroup() %>% 
    select(-c(school, conference)) %>% 
    drop_na(School) %>% 
    select(Coach = coaches, School, talentMaxScore, talentToWinsScore = talFpiScore, recruitingScore) %>% 
    mutate(TotalScore = talentMaxScore*0.45 + talentToWinsScore*0.3 + recruitingScore*0.25) %>% 
    unique() %>% 
    arrange(-TotalScore) -> coachesFinal
## Joining, by = c("coaches", "school", "year")
## Joining, by = c("coaches", "school", "year", "talent")

Final Rankings

With that we get our final rankings… Here are my top 50 coaches for the 2022 College Football Season

coachesFinal %>% head(50) %>% select(-c(talentToWinsScore, talentMaxScore, recruitingScore)) %>% 
    print.data.frame()
##                Coach         School TotalScore
## 1         Nick Saban        Alabama   95.44401
## 2        Kirby Smart        Georgia   92.83131
## 3       Dabo Swinney        Clemson   91.90546
## 4           Ryan Day     Ohio State   87.13453
## 5      Lincoln Riley            USC   78.00064
## 6        Brian Kelly            LSU   77.22907
## 7       Jimbo Fisher      Texas A&M   73.11528
## 8    Mario Cristobal          Miami   71.23343
## 9     James Franklin     Penn State   71.03596
## 10      Jim Harbaugh       Michigan   70.50144
## 11  Kyle Whittingham           Utah   67.77052
## 12       Paul Chryst      Wisconsin   67.60723
## 13       Josh Heupel      Tennessee   66.66868
## 14   Steve Sarkisian          Texas   65.36049
## 15       Dave Aranda         Baylor   63.74103
## 16       Gus Malzahn            UCF   63.41658
## 17      Kirk Ferentz           Iowa   63.10057
## 18     Matt Campbell     Iowa State   63.10005
## 19        Mike Gundy Oklahoma State   61.81591
## 20        P.J. Fleck      Minnesota   59.31576
## 21        Chip Kelly           UCLA   59.30991
## 22      Bryan Harsin         Auburn   59.11102
## 23        David Shaw       Stanford   59.10795
## 24       Lane Kiffin       Ole Miss   58.87092
## 25    Pat Fitzgerald   Northwestern   58.50977
## 26       Jeff Hafley Boston College   56.65616
## 27        Jeff Brohm         Purdue   55.42310
## 28    Dana Holgorsen        Houston   54.61399
## 29      Herm Edwards  Arizona State   54.45544
## 30       Sonny Dykes            TCU   54.02118
## 31       Scott Frost       Nebraska   54.01579
## 32        Mack Brown North Carolina   53.95132
## 33      Billy Napier        Florida   53.93470
## 34       Mark Stoops       Kentucky   53.86225
## 35       Sam Pittman       Arkansas   53.48258
## 36      Dave Clawson    Wake Forest   52.70264
## 37       Dave Doeren       NC State   52.45664
## 38       Dino Babers       Syracuse   51.36355
## 39      Luke Fickell     Cincinnati   49.88894
## 40        Neal Brown  West Virginia   49.81804
## 41      Jason Candle         Toledo   49.79845
## 42 Scott Satterfield     Louisville   49.76225
## 43      Willie Fritz         Tulane   49.75831
## 44         Tom Allen        Indiana   49.51678
## 45     Justin Wilcox     California   49.40662
## 46     Kalani Sitake            BYU   47.71799
## 47     Mike Locksley       Maryland   47.24080
## 48     Eli Drinkwitz       Missouri   46.81002
## 49      Shane Beamer South Carolina   44.87334
## 50      Bret Bielema       Illinois   44.72336

Validating Results

cfbd_ratings_elo() %>% 
    left_join(coachesDf, by = c("team" = "school", "year")) %>% 
    mutate(coach = paste(first_name, last_name)) %>% 
    select(year, team, elo, coach) %>% 
    left_join(coachesFinal, by = c("coach" = "Coach")) %>% 
    left_join(clusters, by = c("team" = "school")) %>% 
    drop_na(TotalScore, cluster) %>% 
    mutate(cluster = as.factor(cluster)) -> validate

# Adjusting for program expectations w/cluster variable
validateModel <- lm(elo ~ coach + cluster, data = validate)

data.frame(validate$elo, validateModel$fitted.values) %>% 
    rename(TrueElo = 1, PredElo = 2) %>% 
    ggplot(aes(x = PredElo, y = TrueElo, alpha = 0.8)) +
    geom_point() +
    geom_smooth() +
    stat_regline_equation(label.y = 2300, label.x = 1200, aes(label = ..rr.label..)) +
    theme_light() +
    theme(legend.position="none") +
    ggtitle("How well does my TotalScore Variable predict success?", 
            subtitle = "Here success is measured by ELO for a technical reason that made it more convenient")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

After comparing our fitted values to the true values, we see that our coach scores are fairly indicative of success.

Next Steps

  • One area this model could be stronger is comparing coaches at group of 5 schools relative to their peers at Power 5 schools, when comparing coaches at smaller schools the cream rises to the top but when compared to the entire FBS pool of coaches some coaches aren’t getting their due (looking at Luke Fickell)

  • I’m working on weighting more recent results more strongly. I think Mack Brown is earning more legacy points than he is for his current body of work

  • I’m working on creating an objective way of identifying “exceptions to the rule” within clusters. Schools like Northwestern and Stanford have different recruiting strategies that their coaches shouldn’t shoulder the blame for. The service academies don’t have really any draft talent given their commitments, so how else can measure their roster’s talent apart from the team’s overall sucess.

Final Note

At this point this draft is pretty outdated. Results will vary from my current draft and even some methodoology to some degree. But for the most part this covers how I went about these rankings and I’ll continue to circle back to update whatever may need updating as the season progresses.