In this exercise we will recreate a graphic that describes positions on the basketball court the average points scored from that position and the frequency at which shot are taken from this position.
These data represent the LA Lakers 2009/2010 season and come from http://www.basketballgeek.com. We are going to access this data from my github.
shots_sum<-read.csv("https://raw.githubusercontent.com/kitadasmalley/DATA502/main/FALL2021/Data/shots_sum.csv",
header=TRUE)
head(shots_sum)
## x y num_shots num_made prop_made avg_points total_points
## 1 0 4 2 1 0.5 1.5 3
## 2 0 5 3 0 0.0 0.0 0
## 3 0 6 1 0 0.0 0.0 0
## 4 0 7 1 0 0.0 0.0 0
## 5 0 9 2 0 0.0 0.0 0
## 6 0 10 1 0 0.0 0.0 0
library(tidyverse)
# Step 1 : Scatterplot
ggplot(shots_sum, aes(x, y))+
geom_point()
# Step 2: Add color
ggplot(shots_sum, aes(x, y))+
geom_point(aes(color=avg_points))
## Warning: Removed 1 rows containing missing values (geom_point).
# Step 3: Size bubbles
ggplot(shots_sum, aes(x, y))+
geom_point(aes(color=avg_points,
size=num_shots))
## Warning: Removed 1 rows containing missing values (geom_point).
Its messing up the sizing!
Let’s explore:
subset(shots_sum, num_shots>3000)
## x y num_shots num_made prop_made avg_points total_points
## 665 25 6 3602 2138 0.5935591 1.187118 4276
# Step 4: Subset the data to remove the "outlier"
shots_sub<-subset(shots_sum, num_shots<=3000)
dim(shots_sub)
## [1] 1364 7
# this can also be done with filer
shots_sub<-shots_sum%>%
filter(num_shots<=3000)
dim(shots_sub)
## [1] 1364 7
# graph again
ggplot(shots_sub, aes(x, y))+
geom_point(aes(color=avg_points,
size=num_shots))
## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(shots_sub, aes(x, y))+
geom_point(aes(color=avg_points,
size=num_shots))+
ylim(0, 35)
## Warning: Removed 54 rows containing missing values (geom_point).
# Step 6: x and y distance should be in same distance
ggplot(shots_sub, aes(x, y))+
geom_point(aes(color=avg_points,
size=num_shots))+
ylim(0, 35)+
coord_equal()
## Warning: Removed 54 rows containing missing values (geom_point).
# Step 7: Change color from green to red
ggplot(shots_sub, aes(x, y))+
geom_point(aes(color=avg_points,
size=num_shots))+
scale_colour_distiller(palette="RdYlGn")+
ylim(0, 35)+
coord_equal()
## Warning: Removed 54 rows containing missing values (geom_point).
# Step 8: Change size scale
ggplot(shots_sub, aes(x, y))+
geom_point(aes(color=avg_points,
size=num_shots))+
scale_colour_distiller(palette="RdYlGn")+
scale_size(trans="sqrt", range=c(0.1, 5))+
ylim(0, 35)+
coord_equal()
## Warning: Removed 54 rows containing missing values (geom_point).
# Step 9: Polishing
ggplot(shots_sub, aes(x, y, color=avg_points,
size=num_shots))+
geom_point()+
scale_colour_distiller("Points", palette="RdYlGn")+
scale_size("Attempts", trans="sqrt", range=c(0.1, 5))+
ylim(0, 35)+
coord_equal()+
theme_classic(18)+
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
axis.line = element_blank(),
axis.title = element_blank())
## Warning: Removed 54 rows containing missing values (geom_point).