Part 1: Learning about your data

Step 1: load the data

library(tidyverse)
data("diamonds")

Step 2 Learn about your data

str(diamonds)
## tibble [53,940 x 10] (S3: tbl_df/tbl/data.frame)
##  $ carat  : num [1:53940] 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num [1:53940] 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num [1:53940] 55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int [1:53940] 326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num [1:53940] 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num [1:53940] 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num [1:53940] 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...

Part 2: Basic scatterplot

ggplot(data= diamonds, aes(x=carat, y=price))+
  geom_point()

Questions

Price vs depth

ggplot(data= diamonds, aes(x= depth, y= price))+
geom_point()

Cut vs Clarity

ggplot(data= diamonds, aes(x= clarity, y= cut))+
geom_point()

Part 3: Aesthetic Mapping

a) Color

ggplot(data=diamonds, aes(x=carat, y=price, color=clarity))+
  geom_point()

Unique colors (hue for nominal data)

ggplot(data=diamonds, aes(x=carat, y=price, color=as.character(clarity)))+
  geom_point()

Saturation gradient for numeric data

ggplot(data=diamonds, aes(x=carat, y=price, color=depth))+
  geom_point()

Same color everywhere. Anything that change as a function as a variable (column) would be in your setting mapping if not, they stay outside

ggplot(data=diamonds, aes(x=carat, y=price))+
  geom_point(color="blue")

Transparency

ggplot(data=diamonds, aes(x=carat, y=price))+
  geom_point(alpha=.2)

ggplot(data=diamonds, aes(x=carat, y=price, alpha=clarity))+
  geom_point()

Shape

Shape is only appropiate for categorical variables when we have few levels
ggplot(data=diamonds, aes(x=carat, y=price, shape=clarity))+
  geom_point()
## Warning: Using shapes for an ordinal variable is not advised
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 8. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 5445 rows containing missing values (geom_point).

Size

ggplot(data=diamonds, aes(x=carat, y=price, size= clarity))+
  geom_point()

Recreate the basketball shot plot

Data

shots_sum<-read.csv("https://raw.githubusercontent.com/kitadasmalley/DATA502/main/FALL2021/Data/shots_sum.csv",
                header=TRUE)
head(shots_sum)
##   x  y num_shots num_made prop_made avg_points total_points
## 1 0  4         2        1       0.5        1.5            3
## 2 0  5         3        0       0.0        0.0            0
## 3 0  6         1        0       0.0        0.0            0
## 4 0  7         1        0       0.0        0.0            0
## 5 0  9         2        0       0.0        0.0            0
## 6 0 10         1        0       0.0        0.0            0
shots_sub <- shots_sum %>%
  filter(num_shots<=3000)

ggplot(shots_sub, aes(x, y))+
  geom_point(aes(color=avg_points, size=num_shots))+
  scale_color_distiller("Points", palette = "RdYlGn")+
  scale_size("Attempts", trans = "sqrt", range= c(0.1, 5))+
ylim(0, 35)+
  coord_equal()+
  theme_classic()+
  theme(axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.line = element_blank(),
        axis.title = element_blank())
## Warning: Removed 54 rows containing missing values (geom_point).