Chapter : GGPLOT2 : Part1 : Scatter Plots
#The ggplot package is a simplified implementation of grammar of graphics written by Hadley Wickham for R.
#Dataset Used : IRIS data.frame
## Import Libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.2
## -- Attaching packages -------------------------------- tidyverse 1.2.1 --
## v tibble 2.0.1 v purrr 0.2.5
## v tidyr 0.8.2 v dplyr 0.7.8
## v readr 1.3.1 v stringr 1.3.1
## v tibble 2.0.1 v forcats 0.3.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'readr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## -- Conflicts ----------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#Simple Scatter Plot
ggplot(data=iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(iris)+
geom_point(mapping = aes(Petal.Length,y = Sepal.Length))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(iris,aes(Petal.Length,y = Sepal.Length))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#Simple Scatter Plot with COLORED PLOTS
ggplot(data=iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length),color = "red")+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length),color = "blue")+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length))+
geom_point(color = "green")+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,col = "brown"))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#Scatter Plot with COLOR Aesthetic
ggplot(data=iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length, color = Species))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",color = "Species")

#OR
ggplot(iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length,color = Species))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",color = "Species")

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length,color = Species))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",color = "Species")

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,col = Species))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#Scatter Plot with SIZE Aesthetic
# NOT ADVISED for discrete variables
ggplot(data=iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length, size = Species))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",size = "Species")
## Warning: Using size for a discrete variable is not advised.

#OR
ggplot(iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length,size = Species))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",size = "Species")
## Warning: Using size for a discrete variable is not advised.

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length,size =Species))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",size = "Species")
## Warning: Using size for a discrete variable is not advised.

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,size = Species))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")
## Warning: Using size for a discrete variable is not advised.

#Scatter Plot with ALPHA Aesthetic
#Not advised for discrete variable
ggplot(data=iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length, alpha = Species))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",alpha = "Species")
## Warning: Using alpha for a discrete variable is not advised.

#OR
ggplot(iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length,alpha = Species))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",alpha = "Species")
## Warning: Using alpha for a discrete variable is not advised.

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length,alpha = Species))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",aplha = "Species")
## Warning: Using alpha for a discrete variable is not advised.

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,alpha = Species))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")
## Warning: Using alpha for a discrete variable is not advised.

#Scatter Plot with shape Aesthetic
#Take 6 Discrete Values Only
#if Variable are more than six it will not plot excess variables
#A Continuous variable can not be mapped to shape
ggplot(data=iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length, shape = Species))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",shape = "Species")

#OR
ggplot(iris)+
geom_point(mapping = aes(x=Petal.Length,y = Sepal.Length,shape=Species))+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",shape = "Species")

#OR
ggplot(data=iris,aes(x=Petal.Length,y = Sepal.Length,shape = Species))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length",shape = "Species")

#OR
ggplot(iris,aes(x= Petal.Length,y = Sepal.Length,shape = Species))+
geom_point()+
labs(x= "Petal Length", y = "Sepal Length",title = "Petal Length versus Sepal Length")

#Scatter Plot with Stroke Aesthetic
#Stroke controls the width of the border of certain shapes. Those shapes which have borders are the only ones that stroke can alter.
ggplot(iris,aes(x=Petal.Length,y=Sepal.Length))+
geom_point(aes(color = factor(Species)), size = 3, shape = 21, fill = "black", stroke = 2)

#Exercises
#Q:1 What's gone wrong with this code? Why are the points not blue?
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = "blue"))

#Ans:
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = "blue")

#Q:2 Which variables in mpg are categorical? Which variables are continuous? (Hint: type ?mpg to read the documentation for the dataset). How can you see this information when you run mpg?
#Ans
head(mpg)
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(~ f 18 29 p comp~
## 2 audi a4 1.8 1999 4 manua~ f 21 29 p comp~
## 3 audi a4 2 2008 4 manua~ f 20 31 p comp~
## 4 audi a4 2 2008 4 auto(~ f 21 30 p comp~
## 5 audi a4 2.8 1999 6 auto(~ f 16 26 p comp~
## 6 audi a4 2.8 1999 6 manua~ f 18 26 p comp~
### Categorical-----------------------------------------------------
##Model,Cyl,Manufacturer, trans, drv,fl, class
###Continous--------------------------------------------------------
##displ, year, cty, hwy
#Q:3 Map a continuous variable to color, size, and shape. How do these aesthetics behave differently for categorical vs. continuous variables?
#Color : It works !
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, colour = cty))

#size : It works!
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, size = cty))

#shape : Error : A continuous variable can not be mapped to shape
#ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, shape = cty))
#Q:4 What happens if you map the same variable to multiple aesthetics?
#Ans: Only One Works
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, colour = cty, size = cty))

#Q:5 What happens if you map an aesthetic to something other than a variable name, like aes(colour = displ < 5)? Note, you'll also need to specify x and y.
#Ans:
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, colour = displ < 5))

#ggplot turns displ < 5 into a boolean (or dummy) variable on the fly and maps that T or F to the colour argument.