Load Libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
Load Data
logs25 <- read_csv("logs25.csv")
## Rows: 11962 Columns: 60
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): Season, GameType, TeamFullName, Opponent, HomeAway, W_L, OT, URL,...
## dbl (49): Game, TeamScore, OpponentScore, TeamFG, TeamFGA, TeamFGPCT, Team3...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
col_types = cols(Game = col_character(),
Date = col_date(format = "%Y-%m-%"))
Bar Chart DF
acc_rebs <- logs25 %>%
filter(Conference == "ACC MBB") %>%
group_by(Team) %>%
summarise(
SeasonOffRebounds = sum(TeamOffRebounds),
SeasonTotalRebounds = sum(TeamTotalRebounds),
SeasonDefRebounds = sum(TeamDefRebounds)
)
Pivot Longer
acc_rebs_longer <- acc_rebs %>%
pivot_longer(
cols=starts_with("Season"),
names_to = "Type",
values_to = "Rebounds"
)
First Bars
acc_rebs_longer %>%
filter(Type != "SeasonTotalRebounds") %>%
ggplot() + geom_col(
aes(
y=reorder(Team, Rebounds),
x=Rebounds,
fill = Type
)
)

Position Dodge
acc_rebs_longer %>%
filter(Type != "SeasonTotalRebounds") %>%
ggplot() + geom_col(
aes(
y=reorder(Team, Rebounds),
x=Rebounds,
fill = Type
),
position="dodge"
)

Ratios
acc_rebs_longer %>%
filter(Type != "SeasonTotalRebounds") %>%
ggplot() + geom_col(
aes(
y=reorder(Team, Rebounds),
x=Rebounds,
fill = Type
),
position="fill"
)

Small Multiples
acc_rebs_longer %>%
filter(Type != "SeasonTotalRebounds") %>%
ggplot() + geom_col(
aes(
x=Type,
y=Rebounds,
fill = Type
)
) +
facet_wrap(~Team) +
theme(axis.text.x = element_blank()
)

Line and Scatter DF
line_and_scatter <- logs25 %>%
filter(Conference == "ACC MBB") %>%
select(Date,
Team,
TeamOffRebounds,
TeamDefRebounds,
TeamTotalRebounds
)
Line Chart 1
line_and_scatter %>%
filter(Team == "North Carolina") %>%
ggplot(
aes(
x=Date,
y=TeamTotalRebounds)
) +
geom_line() +
geom_smooth(method=lm, se=FALSE)
## `geom_smooth()` using formula = 'y ~ x'

Line Chart 2
UNC <- line_and_scatter %>%
filter(Team == "North Carolina")
ggplot() +
geom_line(data=line_and_scatter,
aes(x=Date,
y=TeamTotalRebounds,
group=Team),
color="grey") +
geom_line(data=UNC,
aes(x=Date,
y=TeamTotalRebounds,
group=Team),
color="#4B9CD3") +
geom_smooth(data=line_and_scatter,
aes(x=Date,
y=TeamTotalRebounds),
method=lm, se=FALSE)
## `geom_smooth()` using formula = 'y ~ x'

Step Chart
steps <- logs25 %>%
filter(Conference == "ACC MBB") %>%
group_by(Team) %>%
select(Date,
Team,
TeamDefRebounds,
TeamOffRebounds,
TeamTotalRebounds
) %>%
mutate(
CumulativeRebs = cumsum(TeamTotalRebounds)
)
ggplot() +
geom_step(data=steps,
aes(
x=Date,
y=CumulativeRebs,
group=Team,
color=Team))

Step Chart 2 (Homework)
UNCSteps <- steps %>%
filter(Team == "North Carolina")
ggplot() +
geom_step(data=steps,
aes(
x=Date,
y=CumulativeRebs,
group=Team),
color="grey") +
geom_step(data=UNCSteps,
aes(
x=Date,
y=CumulativeRebs,
group=Team),
color="#4B9CD3")

Scatterplot 1
ggplot()+
geom_point(data=line_and_scatter,
aes(
x=TeamOffRebounds,
y=TeamDefRebounds,
group=Team,
color=Team)) +
geom_smooth(data=line_and_scatter,
aes(x=TeamOffRebounds,
y=TeamDefRebounds,
group=Team,
color=Team),
method="lm",
se=FALSE
)
## `geom_smooth()` using formula = 'y ~ x'

Scatterplot 2
ggplot()+
geom_point(data=line_and_scatter,
aes(
x=TeamOffRebounds,
y=TeamTotalRebounds,
group=Team,
color=Team)) +
geom_smooth(data=line_and_scatter,
aes(x=TeamOffRebounds,
y=TeamTotalRebounds,
group=Team,
color=Team),
method="lm",
se=FALSE
)
## `geom_smooth()` using formula = 'y ~ x'

Fit
fit <- lm(TeamDefRebounds ~ TeamOffRebounds,
data=line_and_scatter)
summary(fit)
##
## Call:
## lm(formula = TeamDefRebounds ~ TeamOffRebounds, data = line_and_scatter)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.0235 -3.2438 -0.1337 3.3155 17.0866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.14225 0.55110 40.178 <2e-16 ***
## TeamOffRebounds 0.11016 0.05824 1.891 0.0591 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.916 on 599 degrees of freedom
## Multiple R-squared: 0.005937, Adjusted R-squared: 0.004277
## F-statistic: 3.577 on 1 and 599 DF, p-value: 0.05906