# IMPORT LIBRARIES
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.2
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble  2.0.1     v purrr   0.2.5
## v tidyr   0.8.2     v dplyr   0.7.8
## v readr   1.3.1     v stringr 1.3.1
## v tibble  2.0.1     v forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'readr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#Facetting : Particularly useful for CATEGORICAL VARIABLES, is to split your plot into facets, subplots that each display one subset of the data.
#FACET WRAP : 
#To facet your plot by a single variable, use facet_wrap(). 
#The first argument of facet_wrap() should be a formula, which you create with ~ followed by a variable name (here “formula” is the name of a data structure in R, not a synonym for “equation”). 
#Note : The variable that you pass to facet_wrap() should be discrete.
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) + 
  facet_wrap(~ class, nrow = 2)

#Facet Grid :
#To facet your plot on the combination of two variables, add facet_grid() to your plot call. 
#The first argument of facet_grid() is also a formula. 
#This time the formula should contain two variable names separated by a ~.
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) + 
  facet_grid(drv ~ cyl)

#If you prefer to not facet in the rows or columns dimension, use a . instead of a variable name, e.g. + facet_grid(. ~ cyl).
#EXERCISE
#What happens if you facet on a continuous variable?
#Your graph will not make much sense. R will try to draw a separate facet for each unique value of the continuous variable. If you have too many unique values, you may crash R.
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = drv, y = cyl)) +
  facet_wrap(~ displ)

#What do the empty cells in plot with facet_grid(drv ~ cyl) mean? How do they relate to this plot?

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = drv, y = cyl))

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = drv, y = cyl)) +
  facet_grid(drv ~ cyl)

#Empty cells mean there are no observations in the data that have that unique combination of values. 
#For instance, in this plot we can determine that there are no vehicles with 5 cylinders that are also 4 wheel drive vehicles. 
#The plot is similar to the original one, just that each facet only appears to have a single data point.
#What plots does the following code make? What does . do?
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(drv ~ .)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(. ~ cyl)

#. acts a placeholder for no variable. In facet_grid(), this results in a plot faceted on a single dimension (1 by N or N by 1) rather than an N by N grid.
#Take the first faceted plot in this section:
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) + 
  facet_wrap(~ class, nrow = 2)

#What are the advantages to using faceting instead of the colour aesthetic? What are the disadvantages? How might the balance change if you had a larger dataset?
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, color = class))

#Faceting splits the data into separate grids and better visualizes trends within each individual facet. 
#The disadvantage is that by doing so, it is harder to visualize the overall relationship across facets. 
#The color aesthetic is fine when your dataset is small, but with larger datasets points may begin to overlap with one another. 
#In this situation with a colored plot, jittering may not be sufficient because of the additional color aesthetic.
#Read ?facet_wrap. What does nrow do? What does ncol do? What other options control the layout of the individual panels? Why doesn’t facet_grid() have nrow and ncol arguments?
#nrow sets how many rows the faceted plot will have.
#ncol sets how many columns the faceted plot will have.
#as.table determines the starting facet to begin filling the plot, and dir determines the starting direction for filling in the plot (horizontal or vertical).
#When using facet_grid() you should usually put the variable with more unique levels in the columns. Why?
#This will extend the plot vertically, where you typically have more viewing space. 
#If you extend it horizontally, the plot will be compressed and harder to view.
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(trans ~ drv)

ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy)) +
  facet_grid(drv ~ trans)