Load Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)

Load Data

logs25 <- read_csv("logs25.csv")
## Rows: 11962 Columns: 60
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): Season, GameType, TeamFullName, Opponent, HomeAway, W_L, OT, URL,...
## dbl  (49): Game, TeamScore, OpponentScore, TeamFG, TeamFGA, TeamFGPCT, Team3...
## date  (1): Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
col_types = cols(Game = col_character(),
                 Date = col_date(format = "%Y-%m-%"))

Bar Chart DF

acc_rebs <- logs25 %>% 
  filter(Conference == "ACC MBB") %>% 
  group_by(Team) %>% 
  summarise(
    SeasonOffRebounds = sum(TeamOffRebounds),
    SeasonTotalRebounds = sum(TeamTotalRebounds),
    SeasonDefRebounds = sum(TeamDefRebounds)
)

Pivot Longer

acc_rebs_longer <- acc_rebs %>% 
  pivot_longer(
    cols=starts_with("Season"),
    names_to = "Type",
    values_to = "Rebounds"
  )

First Bars

acc_rebs_longer %>% 
  filter(Type != "SeasonTotalRebounds") %>% 
  ggplot() + geom_col(
    aes(
      y=reorder(Team, Rebounds),
      x=Rebounds,
      fill = Type
    )
  )

Position Dodge

acc_rebs_longer %>% 
  filter(Type != "SeasonTotalRebounds") %>% 
  ggplot() + geom_col(
    aes(
      y=reorder(Team, Rebounds),
      x=Rebounds,
      fill = Type
    ),
    position="dodge"
  )

Ratios

acc_rebs_longer %>% 
  filter(Type != "SeasonTotalRebounds") %>% 
  ggplot() + geom_col(
    aes(
      y=reorder(Team, Rebounds),
      x=Rebounds,
      fill = Type
    ),
    position="fill"
  )

Small Multiples

acc_rebs_longer %>% 
  filter(Type != "SeasonTotalRebounds") %>% 
  ggplot() + geom_col(
    aes(
      x=Type,
      y=Rebounds,
      fill = Type
    )
  ) +
  facet_wrap(~Team) +
  theme(axis.text.x = element_blank()
        )

Line and Scatter DF

line_and_scatter <- logs25 %>% 
  filter(Conference == "ACC MBB") %>%
  select(Date,
         Team,
         TeamOffRebounds,
         TeamDefRebounds,
         TeamTotalRebounds
         )

Line Chart 1

line_and_scatter %>% 
  filter(Team == "North Carolina") %>% 
  ggplot(
    aes(
    x=Date,
    y=TeamTotalRebounds)
  ) + 
  geom_line() +
  geom_smooth(method=lm, se=FALSE)
## `geom_smooth()` using formula = 'y ~ x'

Line Chart 2

UNC <- line_and_scatter %>% 
  filter(Team == "North Carolina")

ggplot() + 
  geom_line(data=line_and_scatter,
            aes(x=Date,
                y=TeamTotalRebounds,
                group=Team),
            color="grey") +
  
  geom_line(data=UNC,
            aes(x=Date,
                y=TeamTotalRebounds,
                group=Team),
            color="#4B9CD3") +
  
  geom_smooth(data=line_and_scatter, 
              aes(x=Date,
                  y=TeamTotalRebounds),
              method=lm, se=FALSE)
## `geom_smooth()` using formula = 'y ~ x'

Step Chart

steps <- logs25 %>% 
  filter(Conference == "ACC MBB") %>%
  group_by(Team) %>% 
  select(Date,
         Team,
         TeamDefRebounds,
         TeamOffRebounds,
         TeamTotalRebounds
         ) %>% 
  mutate(
      CumulativeRebs = cumsum(TeamTotalRebounds)
      )

ggplot() +
  geom_step(data=steps,
             aes(
               x=Date,
               y=CumulativeRebs,
               group=Team,
               color=Team))

Step Chart 2 (Homework)

UNCSteps <- steps %>% 
  filter(Team == "North Carolina")

ggplot() +
  geom_step(data=steps,
             aes(
               x=Date,
               y=CumulativeRebs,
               group=Team),
               color="grey") +
  
  geom_step(data=UNCSteps,
             aes(
               x=Date,
               y=CumulativeRebs,
               group=Team),
               color="#4B9CD3")

Scatterplot 1

ggplot()+
  geom_point(data=line_and_scatter,
             aes(
             x=TeamOffRebounds,
             y=TeamDefRebounds,
             group=Team,
             color=Team)) +
  geom_smooth(data=line_and_scatter,
            aes(x=TeamOffRebounds,
             y=TeamDefRebounds,
             group=Team,
             color=Team),
            method="lm",
            se=FALSE
            )
## `geom_smooth()` using formula = 'y ~ x'

Scatterplot 2

ggplot()+
  geom_point(data=line_and_scatter,
             aes(
             x=TeamOffRebounds,
             y=TeamTotalRebounds,
             group=Team,
             color=Team)) +
  geom_smooth(data=line_and_scatter,
            aes(x=TeamOffRebounds,
             y=TeamTotalRebounds,
             group=Team,
             color=Team),
            method="lm",
            se=FALSE
            )
## `geom_smooth()` using formula = 'y ~ x'

Fit

fit <- lm(TeamDefRebounds ~ TeamOffRebounds,
   data=line_and_scatter)

summary(fit)
## 
## Call:
## lm(formula = TeamDefRebounds ~ TeamOffRebounds, data = line_and_scatter)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.0235  -3.2438  -0.1337   3.3155  17.0866 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     22.14225    0.55110  40.178   <2e-16 ***
## TeamOffRebounds  0.11016    0.05824   1.891   0.0591 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.916 on 599 degrees of freedom
## Multiple R-squared:  0.005937,   Adjusted R-squared:  0.004277 
## F-statistic: 3.577 on 1 and 599 DF,  p-value: 0.05906