In this exercise we will recreate a graphic that describes positions on the basketball court the average points scored from that position and the frequency at which shot are taken from this position.
These data represent the LA Lakers 2009/2010 season and come from http://www.basketballgeek.com. We are going to access this data from my github.
shots_sum<-read.csv("https://raw.githubusercontent.com/kitadasmalley/DATA502/main/FALL2021/Data/shots_sum.csv",
header=TRUE)
head(shots_sum)
## x y num_shots num_made prop_made avg_points total_points
## 1 0 4 2 1 0.5 1.5 3
## 2 0 5 3 0 0.0 0.0 0
## 3 0 6 1 0 0.0 0.0 0
## 4 0 7 1 0 0.0 0.0 0
## 5 0 9 2 0 0.0 0.0 0
## 6 0 10 1 0 0.0 0.0 0
library(tidyverse)
# Step 1 : Scatterplot
## FILL IN CODE HERE ##
ggplot(shots_sum, aes(x,y))+
geom_point()
# Step 2: Add color
## FILL IN CODE HERE ##
ggplot(shots_sum, aes(x,y))+
geom_point(aes(color=avg_points)) #anything that's a variable mapping must be within an aesthetic
## Warning: Removed 1 rows containing missing values (geom_point).
# Step 3: Size bubbles
## FILL IN CODE HERE ##
ggplot(shots_sum, aes(x,y))+
geom_point(aes(color=avg_points, size=num_shots))
## Warning: Removed 1 rows containing missing values (geom_point).
Its messing up the sizing!
Let’s explore:
## FILL IN CODE HERE ##
subset(shots_sum, num_shots>3000)
## x y num_shots num_made prop_made avg_points total_points
## 665 25 6 3602 2138 0.5935591 1.187118 4276
# Step 4: Subset the data to remove the "outlier"
## FILL IN CODE HERE ##
shots_sub <- subset(shots_sum, num_shots<=3000)
dim(shots_sub)
## [1] 1364 7
# this can also be done with filer
## FILL IN CODE HERE ##
# graph again
## FILL IN CODE HERE ##
ggplot(shots_sub, aes(x,y))+
geom_point(aes(color=avg_points, size=num_shots))
## Warning: Removed 1 rows containing missing values (geom_point).
## FILL IN CODE HERE ##
ggplot(shots_sum, aes(x,y))+
geom_point(aes(color=avg_points, size=num_shots)) +
ylim(0,35)
## Warning: Removed 54 rows containing missing values (geom_point).
# Step 6: x and y distance should be in same distance
## FILL IN CODE HERE ##
ggplot(shots_sum, aes(x,y))+
geom_point(aes(color=avg_points, size=num_shots)) +
ylim(0,35)+
coord_equal()
## Warning: Removed 54 rows containing missing values (geom_point).
# Step 7: Change color from green to red
## FILL IN CODE HERE ##
ggplot(shots_sum, aes(x,y))+
geom_point(aes(color=avg_points, size=num_shots)) +
scale_colour_distiller(palette="RdYlGn")+
ylim(0,35)+
coord_equal()
## Warning: Removed 54 rows containing missing values (geom_point).
# Step 8: Change size scale
## FILL IN CODE HERE ##
ggplot(shots_sum, aes(x,y))+
geom_point(aes(color=avg_points, size=num_shots)) +
scale_colour_distiller(palette="RdYlGn")+
scale_size(trans="sqrt", range=c(0.1,5))+
ylim(0,35)+
coord_equal()
## Warning: Removed 54 rows containing missing values (geom_point).
# Step 9: Polishing
## FILL IN CODE HERE ##
ggplot(shots_sum, aes(x,y))+
geom_point(aes(color=avg_points, size=num_shots)) +
scale_colour_distiller(palette="RdYlGn")+
scale_size(trans="sqrt", range=c(0.1,5))+
ylim(0,35)+
coord_equal()+
theme_classic(18)+
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
axis.line = element_blank(),
axis.title = element_blank())
## Warning: Removed 54 rows containing missing values (geom_point).