GM #3: Application Overview + Data Viz Workshop

Fun times with ggplot2

Kevin Baer

Bruin Sports Analytics President

October 15, 2025

Overview

  • Club Announcements

  • Application Overview

  • Data Viz Exemplar

Club Announcements

  • Slack update
    • When you get in the Slack, toss an introduction in the #introductions channel
  • Applications are open
    • Baseball
    • Basketball
    • Football
    • Tennis
  • Group pitches took place last week: any questions, reach out to the chairs
  • Thank you to all who came to board office hours!

Application Overview

Link to Application: bit.ly/BSA_Fall25_App

Mailroom + Load Data

library(tidyverse)

db <- read_csv("../BSA_Application_Dataset_25.csv")

glimpse(db)
Rows: 18
Columns: 31
$ team_id                                         <dbl> 356, 84, 2294, 120, 12…
$ team                                            <chr> "Illinois", "Indiana",…
$ team_color                                      <chr> "ff5f05", "990000", "0…
$ team_alternate_color                            <chr> "13294b", "edebeb", "f…
$ logo                                            <chr> "https://a.espncdn.com…
$ defensive_avg_defensive_rebounds                <dbl> 26.43333, 26.22581, 27…
$ defensive_avg_blocks                            <dbl> 2.433333, 3.677419, 3.…
$ defensive_avg_steals                            <dbl> 5.400000, 6.419355, 7.…
$ general_assist_turnover_ratio                   <dbl> 1.1823056, 1.1502242, …
$ general_steal_foul_ratio                        <dbl> 0.3681818, 0.3692022, …
$ general_block_foul_ratio                        <dbl> 0.1659091, 0.2115028, …
$ offensive_avg_field_goals_made                  <dbl> 25.36667, 24.93548, 27…
$ offensive_avg_field_goals_attempted             <dbl> 58.30000, 55.22581, 58…
$ offensive_avg_three_point_field_goals_made      <dbl> 5.700000, 7.870968, 7.…
$ offensive_avg_three_point_field_goals_attempted <dbl> 16.53333, 21.45161, 20…
$ offensive_avg_free_throws_made                  <dbl> 14.766666, 12.548388, …
$ offensive_avg_free_throws_attempted             <dbl> 18.60000, 15.90323, 17…
$ offensive_avg_points                            <dbl> 71.20000, 70.29032, 74…
$ offensive_avg_offensive_rebounds                <dbl> 10.333333, 7.032258, 1…
$ offensive_avg_assists                           <dbl> 14.70000, 16.54839, 17…
$ offensive_avg_turnovers                         <dbl> 12.43333, 14.38710, 15…
$ offensive_avg_estimated_possessions             <dbl> 68.30666, 69.45806, 71…
$ offensive_avg_team_turnovers                    <dbl> 0.4666667, 0.5161290, …
$ offensive_avg_total_turnovers                   <dbl> 12.90000, 14.90323, 16…
$ offensive_avg_two_point_field_goals_made        <dbl> 19.66667, 17.06452, 19…
$ offensive_avg_two_point_field_goals_attempted   <dbl> 41.76667, 33.77419, 37…
$ offensive_points_per_estimated_possessions      <dbl> 1.0423580, 1.0119822, …
$ offensive_three_point_field_goal_pct            <dbl> 34.47581, 36.69173, 34…
$ offensive_two_point_field_goal_pct              <dbl> 47.08699, 50.52531, 52…
$ offensive_shooting_efficiency                   <dbl> 0.4839908, 0.5227804, …
$ offensive_scoring_efficiency                    <dbl> 1.221269, 1.272780, 1.…

Topic Selection and Sanity Check

scoring <- db |>
  select(
    team,
    team_color,
    `3pt` = offensive_avg_three_point_field_goals_made,
    `2pt` = offensive_avg_two_point_field_goals_made,
    FT = offensive_avg_free_throws_made,
    TP = offensive_avg_points
  ) 

scoring |>
  mutate(
    pred_points = `3pt` * 3 + `2pt` * 2 + FT * 1
  ) |>
  select(TP, pred_points) |> 
  head(n = 5)

Topic Selection and Sanity Check

scoring <- db |>
  select(
    team,
    `3pt` = offensive_avg_three_point_field_goals_made,
    `2pt` = offensive_avg_two_point_field_goals_made,
    FT = offensive_avg_free_throws_made,
    TP = offensive_avg_points
  ) 

scoring |>
  mutate(
    pred_points = `3pt` * 3 + `2pt` * 2 + FT * 1
  ) |>
  select(TP, pred_points) |> 
  head(n = 5)
# A tibble: 5 × 2
     TP pred_points
  <dbl>       <dbl>
1  71.2        71.2
2  70.3        70.3
3  74.3        74.3
4  79.9        79.9
5  79.5        79.5

Data Manipulation

scoring <- db |>
  select(
    team,
    team_color,
    team_alternate_color,
    `3pt` = offensive_avg_three_point_field_goals_made,
    `2pt` = offensive_avg_two_point_field_goals_made,
    FT = offensive_avg_free_throws_made,
    TP = offensive_avg_points
  ) |>
  mutate(
    team_color = as_factor(paste0("#", team_color)),
    team_alternate_color = as_factor(paste0("#", team_alternate_color)),
    `3pt` = `3pt` * 3 / TP,
    `2pt` = `2pt` * 2 / TP,
    FT = FT / TP
  )
scoring |> head(n = 5)

Data Manipulation

scoring <- db |>
  select(
    team,
    team_color,
    team_alternate_color,
    `3pt` = offensive_avg_three_point_field_goals_made,
    `2pt` = offensive_avg_two_point_field_goals_made,
    FT = offensive_avg_free_throws_made,
    TP = offensive_avg_points
  ) |>
  mutate(
    team_color = as_factor(paste0("#", team_color)),
    team_alternate_color = as_factor(paste0("#", team_alternate_color)),
    `3pt` = `3pt` * 3 / TP,
    `2pt` = `2pt` * 2 / TP,
    FT = FT / TP
  )
scoring |> head(n = 5)
# A tibble: 5 × 7
  team        team_color team_alternate_color `3pt` `2pt`    FT    TP
  <chr>       <fct>      <fct>                <dbl> <dbl> <dbl> <dbl>
1 Illinois    #ff5f05    #13294b              0.240 0.552 0.207  71.2
2 Indiana     #990000    #edebeb              0.336 0.486 0.179  70.3
3 Iowa        #000000    #fcd116              0.293 0.535 0.172  74.3
4 Maryland    #D5002B    #ffcd00              0.229 0.559 0.212  79.9
5 Michigan St #18453b    #ffffff              0.262 0.543 0.196  79.5

First Plot

ggplot(scoring)

First Plot

ggplot(scoring) +
  aes(x = `2pt`, y = team, fill = team_color)

First Plot

ggplot(scoring) +
  aes(x = `2pt`, y = team, fill = team_color) +
  geom_col()

First Plot

ggplot(scoring) +
  aes(x = `2pt`, y = team, fill = team_color) +
  geom_col() +
  scale_fill_identity()

But now… a problem.

Pivot

scoring_2 <- scoring |>
  pivot_longer(
    cols = c(`3pt`, `2pt`, FT),
    names_to = "method",
    values_to = "pct"
  ) |>
  mutate(method = factor(method, levels = c("FT", "3pt", "2pt")))

scoring_2 |> head(n = 10)
# A tibble: 10 × 6
   team     team_color team_alternate_color    TP method   pct
   <chr>    <fct>      <fct>                <dbl> <fct>  <dbl>
 1 Illinois #ff5f05    #13294b               71.2 3pt    0.240
 2 Illinois #ff5f05    #13294b               71.2 2pt    0.552
 3 Illinois #ff5f05    #13294b               71.2 FT     0.207
 4 Indiana  #990000    #edebeb               70.3 3pt    0.336
 5 Indiana  #990000    #edebeb               70.3 2pt    0.486
 6 Indiana  #990000    #edebeb               70.3 FT     0.179
 7 Iowa     #000000    #fcd116               74.3 3pt    0.293
 8 Iowa     #000000    #fcd116               74.3 2pt    0.535
 9 Iowa     #000000    #fcd116               74.3 FT     0.172
10 Maryland #D5002B    #ffcd00               79.9 3pt    0.229

Try 2

ggplot(scoring_2) +
  aes(y = team, fill = method) +
  geom_col(aes(x = pct))

Try 2

library(ggpattern)

ggplot(scoring_2) +
  aes(y = team, fill = team_color) +
  geom_col_pattern(
    aes(x = pct, pattern_type = method),
    position = "stack",
    pattern = "magick"
  ) +
  scale_pattern_type_discrete(
    choices = c("verticalbricks", "hexagons", "bricks")
  ) +
  scale_fill_identity()

Try 2

That was hard to read. What if we tried different colors?

scoring_3 <- scoring_2 |>
  mutate(
    fill_color = case_when(
      method == "2pt" ~ team_color,
      method == "3pt" ~ "grey50",
      method == "FT" ~ team_alternate_color
    )
  )

ggplot(scoring_3, aes(y = team, fill = fill_color)) +
  geom_col(aes(x = pct), position = "stack") +
  scale_fill_identity()

That was hard to read. What if we tried different colors?

Final try

ggplot(
  scoring_2 |> mutate(mean = mean(pct), .by = method),
  aes(x = pct, y = fct_rev(team), fill = team_color)
) +
  facet_wrap(~method, scales = "free_x") +
  geom_col() +
  geom_vline(aes(xintercept = mean), color = "black", linewidth = 3) +
  scale_fill_identity() +
  theme_bw(base_size = 22, base_family = "Barlow") +
  labs(
    y = NULL, x = NULL,
    title = "Scoring Breakdown - Big 10 WBB",
    caption = "By Kevin Baer"
  ) +
  scale_x_continuous(
    labels = scales::label_percent(),
    expand = expansion(mult = c(0.02, 0.06))
  ) +
  theme(plot.title.position = "plot")

Final try

Thank you!

  • No meeting next week — Good luck on midterms!

  • Applications close Sunday October 26th at 11:59pm

  • Interview requests will go out the morning of Wednesday October 29th