This workshop will preview some basic data visualization techniques
in R using the ggplot2 library and assumes some prior
knowledge and experience with the tidyverse library. For
more extensive notes on data manipulation, see here.
tidyverseLoad package and data (this dataset and others can be found here).
library(tidyverse)
nba = read_csv("nba23.csv")
## Rows: 679 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Player, Pos, Tm
## dbl (26): Age, G, GS, MP, FG, FGA, FG%, 3P, 3PA, 3P%, 2P, 2PA, 2P%, eFG%, FT...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nba %>%
mutate(MIN = MP / G,
`AST/TO` = AST / TOV) %>%
filter(Tm == "DEN",
MIN >= 10) %>%
arrange(desc(`AST/TO`)) %>%
select(Player, Pos, AST, TOV, `AST/TO`) %>%
head(5)
## # A tibble: 5 × 5
## Player Pos AST TOV `AST/TO`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Jamal Murray PG 400 145 2.76
## 2 Nikola Jokić C 678 247 2.74
## 3 Reggie Jackson PG 50 19 2.63
## 4 Bruce Brown SF 268 123 2.18
## 5 Vlatko Čančar PF 80 37 2.16
nba %>%
group_by(Tm) %>%
summarize(`3PM` = sum(`3P`),
`3PA` = sum(`3PA`)) %>%
ungroup() %>%
mutate(`3P%` = 100 * `3PM` / `3PA`) %>%
arrange(`3P%`) %>%
head(5)
## # A tibble: 5 × 4
## Tm `3PM` `3PA` `3P%`
## <chr> <dbl> <dbl> <dbl>
## 1 HOU 856 2619 32.7
## 2 CHO 881 2669 33.0
## 3 TOR 880 2626 33.5
## 4 MIA 980 2852 34.4
## 5 SAS 911 2640 34.5
ggplot2Documentation for plotting in ggplot2 found here.
library(ggplot2)
df =
nba %>%
mutate(`FG%` = 100 * FG / FGA)
ggplot(df) +
geom_point(aes(x=PTS,
y=`FG%`,
color=Pos),
size=3,
alpha=0.75) +
theme_bw()
nba %>%
ggplot() +
geom_histogram(aes(x=`3P`),
color='darkblue',
bins=30) +
labs(x="3-Point Makes (3PM)",
y="Count",
title="Distribution of Player 3PM in the 2022-23 NBA Season") +
theme_bw()
nba %>%
filter(Tm %in% c("GSW", "LAL", "LAC", "SAC", "PHO")) %>%
group_by(Tm) %>%
summarize(`3PM` = sum(`3P`),
`3PA` = sum(`3PA`)) %>%
ungroup() %>%
mutate(`3P%` = 100 * `3PM` / `3PA`) %>%
ggplot() +
geom_bar(aes(x=Tm,
y=`3P%`,
fill=Tm),
stat='identity') +
lims(y = c(0, 50)) +
theme_bw()