1. Loading in the tidyverse

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.1     v dplyr   1.0.6
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

2. Load in the data

data("mpg")

Make a simple scatterplot

ggplot(data=mpg, aes(x=displ, y=hwy))+
  geom_point()

### Run ggplot

ggplot(data=mpg)

### str of mpg

str(mpg)   #234 rows and 11 columns
## tibble [234 x 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...

drv variable

?mpg    #drv: the type of drvie train; f=frontwheel drive r=rear wheel drive, 4=4wd
## starting httpd help server ... done

scatter plot of hwy vs cyl

ggplot(data=mpg,aes(x=cyl, y=hwy))+
  geom_point()

### scatterplot of class vs drv

ggplot(data=mpg, aes(x=drv, y=class))+
  geom_point()

## 3. Aesthetic Mappings #### A. color

ggplot(data=mpg, aes(x=displ, y=hwy, color=class))+
  geom_point()

ggplot(data=mpg, aes(x=displ, y=hwy))+
  geom_point(color="blue")

### B. Transparency

ggplot(data=mpg, aes(x=displ, y=hwy, alpha=class))+
  geom_point()    #transparency가 dircrete variable에 어울리지 않는 이유는 order를 보여주는 것 같기 때문!
## Warning: Using alpha for a discrete variable is not advised.

ggplot(data=mpg, aes(x=displ, y=hwy))+
  geom_point(alpha=.2)  #points are on top of each other if they get darker

### C. Shape

ggplot(data=mpg, aes(x=displ, y=hwy, shape=class))+
  geom_point()   #don't have enought shapes also not efficient if we have more values than 6.
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 7. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).

### D. Size

ggplot(data=mpg, aes(x=displ, y=hwy, size=class))+
  geom_point()     #also size has a order so not a good idea
## Warning: Using size for a discrete variable is not advised.

ggplot(data=mpg)+
  geom_point(mapping=aes(x=displ, y=hwy, color="blue"))

str(mpg)
## tibble [234 x 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
ggplot(data=mpg, aes(x=displ, y=hwy, color=cty))+
  geom_point()

ggplot(data=mpg, aes(x=displ, y=hwy, alpha=cty))+
  geom_point()

ggplot(data=mpg, aes(x=displ, y=hwy, color=cty, size=cty))+
  geom_point()

## PART 1: tidyverse