Exploring the Grammar of Graphics

Data Preparation

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
rm(list=ls())
## Warning: package 'ggplot2' was built under R version 3.2.5
## Classes 'tbl_df', 'tbl' and 'data.frame':    234 obs. of  11 variables:
##  $ manufacturer: chr  "audi" "audi" "audi" "audi" ...
##  $ model       : chr  "a4" "a4" "a4" "a4" ...
##  $ displ       : num  1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int  1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int  4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr  "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr  "f" "f" "f" "f" ...
##  $ cty         : int  18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int  29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr  "p" "p" "p" "p" ...
##  $ class       : chr  "compact" "compact" "compact" "compact" ...
help("mpg")
## starting httpd help server ... done
# Vis 1
p <- ggplot(data=df,aes(class))
p <- p+  geom_bar(position = "stack" ,aes(fill=Transmission) )
p 

help("geom_bar")
# Vis 2
df2 <-mpg
p2 <- ggplot(data=df2,aes(manufacturer,hwy ))
p2 <- p2 + geom_boxplot(aes(group=manufacturer)) +  coord_flip() + xlab("Vehicle Manufacturer") + ylab("Highway Fuel Efficiency(miles/gallon") 
p2

# Vis 3
data("diamonds")
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.2.5
df3 <- diamonds
str(df3)
## Classes 'tbl_df', 'tbl' and 'data.frame':    53940 obs. of  10 variables:
##  $ carat  : num  0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
##  $ cut    : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
##  $ color  : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
##  $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
##  $ depth  : num  61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
##  $ table  : num  55 61 65 58 58 57 57 55 61 61 ...
##  $ price  : int  326 326 327 334 335 336 336 337 337 338 ...
##  $ x      : num  3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
##  $ y      : num  3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
##  $ z      : num  2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
p3 <- ggplot(data=df3,aes(x=price, fill=cut))
p3 <- p3+ geom_density() +   
  xlab("Diamond Price(USD)") +theme(legend.position = "top") + ggtitle("Diamond Price Density")  
p3

help("ggthemes")
# Vis 4
library(datasets)
df4 <- iris
summary(df4)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
p4 <- ggplot(data=df4, aes( Sepal.Length, Petal.Length ))
p4 <- p4 + geom_point() +geom_smooth(method=lm) + xlab("Irs Sepal Length") + ylab("Iris Petal Length") +ggtitle("Relationship Between Petal and Sepal Length")
p4

#Vis 5
p5 <- ggplot(data=df4, aes( Sepal.Length, Petal.Length))
p5 <- p4 + geom_point(aes(color=Species),size=3) + xlab("Irs Sepal Length") + ylab("Iris Petal Length") +ggtitle("Relationship Between Petal and Sepal Length") + theme_bw() + geom_smooth(method="lm",se=FALSE)+theme(legend.position = "bottom") 
p5

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.