Part 1: Learning about your data
Step 1: load the data
library(tidyverse)
data("diamonds")
Step 2 Learn about your data
str(diamonds)
## tibble [53,940 x 10] (S3: tbl_df/tbl/data.frame)
## $ carat : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
Part 2: Basic scatterplot
ggplot(data= diamonds, aes(x=carat, y=price))+
geom_point()

Questions
Price vs depth
ggplot(data= diamonds, aes(x= depth, y= price))+
geom_point()

Cut vs Clarity
ggplot(data= diamonds, aes(x= clarity, y= cut))+
geom_point()

Part 3: Aesthetic Mapping
a) Color
ggplot(data=diamonds, aes(x=carat, y=price, color=clarity))+
geom_point()

Unique colors (hue for nominal data)
ggplot(data=diamonds, aes(x=carat, y=price, color=as.character(clarity)))+
geom_point()

Saturation gradient for numeric data
ggplot(data=diamonds, aes(x=carat, y=price, color=depth))+
geom_point()

Same color everywhere. Anything that change as a function as a variable (column) would be in your setting mapping if not, they stay outside
ggplot(data=diamonds, aes(x=carat, y=price))+
geom_point(color="blue")

Transparency
ggplot(data=diamonds, aes(x=carat, y=price))+
geom_point(alpha=.2)

ggplot(data=diamonds, aes(x=carat, y=price, alpha=clarity))+
geom_point()

Shape
Shape is only appropiate for categorical variables when we have few levels
ggplot(data=diamonds, aes(x=carat, y=price, shape=clarity))+
geom_point()
## Warning: Using shapes for an ordinal variable is not advised
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 8. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 5445 rows containing missing values (geom_point).

Size
ggplot(data=diamonds, aes(x=carat, y=price, size= clarity))+
geom_point()

Recreate the basketball shot plot
Data
shots_sum<-read.csv("https://raw.githubusercontent.com/kitadasmalley/DATA502/main/FALL2021/Data/shots_sum.csv",
header=TRUE)
head(shots_sum)
## x y num_shots num_made prop_made avg_points total_points
## 1 0 4 2 1 0.5 1.5 3
## 2 0 5 3 0 0.0 0.0 0
## 3 0 6 1 0 0.0 0.0 0
## 4 0 7 1 0 0.0 0.0 0
## 5 0 9 2 0 0.0 0.0 0
## 6 0 10 1 0 0.0 0.0 0
shots_sub <- shots_sum %>%
filter(num_shots<=3000)
ggplot(shots_sub, aes(x, y))+
geom_point(aes(color=avg_points, size=num_shots))+
scale_color_distiller("Points", palette = "RdYlGn")+
scale_size("Attempts", trans = "sqrt", range= c(0.1, 5))+
ylim(0, 35)+
coord_equal()+
theme_classic()+
theme(axis.ticks = element_blank(),
axis.text = element_blank(),
axis.line = element_blank(),
axis.title = element_blank())
## Warning: Removed 54 rows containing missing values (geom_point).
