Chapter : GGPLOT2 : Part1 : Scatter Plots

#The ggplot package is a simplified implementation of grammar of graphics written by Hadley Wickham for R.
#Dataset Used : IRIS data.frame
## Import Libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.2
## -- Attaching packages -------------------------------- tidyverse 1.2.1 --
## v tibble  2.0.1     v purrr   0.2.5
## v tidyr   0.8.2     v dplyr   0.7.8
## v readr   1.3.1     v stringr 1.3.1
## v tibble  2.0.1     v forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'readr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## -- Conflicts ----------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#Simple Scatter Plot
ggplot(data=iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(iris)+
  geom_point(mapping = aes(Petal.Length,y = Sepal.Length))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(iris,aes(Petal.Length,y = Sepal.Length))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#Simple Scatter Plot with COLORED PLOTS
ggplot(data=iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length),color = "red")+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length),color = "blue")+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length))+
  geom_point(color = "green")+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,col = "brown"))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#Scatter Plot with COLOR Aesthetic
ggplot(data=iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length, color = Species))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",color = "Species")

#OR
ggplot(iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length,color = Species))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",color = "Species")

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length,color = Species))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",color = "Species")

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,col = Species))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#Scatter Plot with SIZE Aesthetic
# NOT ADVISED for discrete variables
ggplot(data=iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length, size = Species))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",size = "Species")
## Warning: Using size for a discrete variable is not advised.

#OR
ggplot(iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length,size = Species))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",size = "Species")
## Warning: Using size for a discrete variable is not advised.

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length,size =Species))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",size = "Species")
## Warning: Using size for a discrete variable is not advised.

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,size = Species))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")
## Warning: Using size for a discrete variable is not advised.

#Scatter Plot with ALPHA Aesthetic
#Not advised for discrete variable
ggplot(data=iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length, alpha = Species))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",alpha = "Species")
## Warning: Using alpha for a discrete variable is not advised.

#OR
ggplot(iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length,alpha = Species))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",alpha = "Species")
## Warning: Using alpha for a discrete variable is not advised.

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length,alpha = Species))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",aplha = "Species")
## Warning: Using alpha for a discrete variable is not advised.

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,alpha = Species))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")
## Warning: Using alpha for a discrete variable is not advised.

#Scatter Plot with shape Aesthetic
#Take 6 Discrete Values Only
#if Variable are more than six it will not plot excess variables
#A Continuous variable can not be mapped to shape
ggplot(data=iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length, shape = Species))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",shape = "Species")

#OR
ggplot(iris)+
  geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length,shape=Species))+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",shape = "Species")

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length,shape = Species))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",shape = "Species")

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,shape = Species))+
  geom_point()+
  labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#Scatter Plot with Stroke Aesthetic
#Stroke controls the width of the border of certain shapes. Those shapes which have borders are the only ones that stroke can alter.
ggplot(iris,aes(x=Petal.Length,y=Sepal.Length))+
  geom_point(aes(color = factor(Species)), size = 3, shape = 21, fill = "black", stroke = 2)

#Exercises
#Q:1 What's gone wrong with this code? Why are the points not blue?
 ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy, color = "blue"))

#Ans:
ggplot(data = mpg) + 
  geom_point(mapping = aes(x = displ, y = hwy), color = "blue")

#Q:2 Which variables in mpg are categorical? Which variables are continuous? (Hint: type ?mpg to read the documentation for the dataset). How can you see this information when you run mpg?
#Ans
head(mpg)
## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl trans  drv     cty   hwy fl    class
##   <chr>        <chr> <dbl> <int> <int> <chr>  <chr> <int> <int> <chr> <chr>
## 1 audi         a4      1.8  1999     4 auto(~ f        18    29 p     comp~
## 2 audi         a4      1.8  1999     4 manua~ f        21    29 p     comp~
## 3 audi         a4      2    2008     4 manua~ f        20    31 p     comp~
## 4 audi         a4      2    2008     4 auto(~ f        21    30 p     comp~
## 5 audi         a4      2.8  1999     6 auto(~ f        16    26 p     comp~
## 6 audi         a4      2.8  1999     6 manua~ f        18    26 p     comp~
### Categorical-----------------------------------------------------
##Model,Cyl,Manufacturer, trans, drv,fl, class

###Continous--------------------------------------------------------
##displ, year, cty, hwy
#Q:3 Map a continuous variable to color, size, and shape. How do these aesthetics behave differently for categorical vs. continuous variables?

#Color : It works !
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, colour = cty))

#size : It works!
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, size = cty))

#shape : Error : A continuous variable can not be mapped to shape
#ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, shape = cty))
#Q:4 What happens if you map the same variable to multiple aesthetics?
#Ans: Only One Works
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, colour = cty, size = cty))

#Q:5 What happens if you map an aesthetic to something other than a variable name, like aes(colour = displ < 5)? Note, you'll also need to specify x and y.
#Ans:
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, colour = displ < 5))

#ggplot turns displ < 5 into a boolean (or dummy) variable on the fly and maps that T or F to the colour argument.