Exploring the Grammar of Graphics

Charles Pierre

2017-07-25

Vis 1

This graphic is a traditional stacked bar chart. This graphic works on the mpg dataset, which is built into the ggplot2 library. This means that you can access it simply by ggplot(mpg, ….). There is one modification above default in this graphic, I renamed the legend for more clarity.

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data(mpg)
head(mpg)
## # A tibble: 6 x 11
##   manufacturer model displ  year   cyl      trans   drv   cty   hwy    fl
##          <chr> <chr> <dbl> <int> <int>      <chr> <chr> <int> <int> <chr>
## 1         audi    a4   1.8  1999     4   auto(l5)     f    18    29     p
## 2         audi    a4   1.8  1999     4 manual(m5)     f    21    29     p
## 3         audi    a4   2.0  2008     4 manual(m6)     f    20    31     p
## 4         audi    a4   2.0  2008     4   auto(av)     f    21    30     p
## 5         audi    a4   2.8  1999     6   auto(l5)     f    16    26     p
## 6         audi    a4   2.8  1999     6 manual(m5)     f    18    26     p
## # ... with 1 more variables: class <chr>
summary(mpg)
##  manufacturer          model               displ            year     
##  Length:234         Length:234         Min.   :1.600   Min.   :1999  
##  Class :character   Class :character   1st Qu.:2.400   1st Qu.:1999  
##  Mode  :character   Mode  :character   Median :3.300   Median :2004  
##                                        Mean   :3.472   Mean   :2004  
##                                        3rd Qu.:4.600   3rd Qu.:2008  
##                                        Max.   :7.000   Max.   :2008  
##       cyl           trans               drv                 cty       
##  Min.   :4.000   Length:234         Length:234         Min.   : 9.00  
##  1st Qu.:4.000   Class :character   Class :character   1st Qu.:14.00  
##  Median :6.000   Mode  :character   Mode  :character   Median :17.00  
##  Mean   :5.889                                         Mean   :16.86  
##  3rd Qu.:8.000                                         3rd Qu.:19.00  
##  Max.   :8.000                                         Max.   :35.00  
##       hwy             fl               class          
##  Min.   :12.00   Length:234         Length:234        
##  1st Qu.:18.00   Class :character   Class :character  
##  Median :24.00   Mode  :character   Mode  :character  
##  Mean   :23.44                                        
##  3rd Qu.:27.00                                        
##  Max.   :44.00
ggplot(data=mpg) +
           geom_bar(mapping=aes(x=class))

ggplot(data=mpg) +
        aes(x=class) + 
        geom_bar(aes(color = trans, fill=trans))  

Vis 2

Boxplot using the mpg dataset.

ggplot(data=mpg, aes(x=manufacturer,y=hwy)) + geom_boxplot() + theme_classic()

ggplot(data=mpg, aes(x=manufacturer,y=hwy)) + geom_boxplot() + theme_classic() + labs(x="manufacturer",y="hwy(miles/gallon)")  + coord_flip()

ggplot(data=mpg,mapping = aes(x=reorder(manufacturer,hwy,median,na.rm=FALSE),y=hwy))+geom_boxplot()+labs(x="manufacturer",y="hwy(miles/gallon)")+coord_flip()

VIS 3

require(reshape2)
## Loading required package: reshape2
## Warning: package 'reshape2' was built under R version 3.3.3
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.3.3
require(stringr)
## Loading required package: stringr
require(diamonds)
## Loading required package: diamonds
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'diamonds'
summary(diamonds)
##      carat               cut        color        clarity     
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655  
##                                     J: 2808   (Other): 2531  
##      depth           table           price             x         
##  Min.   :43.00   Min.   :43.00   Min.   :  326   Min.   : 0.000  
##  1st Qu.:61.00   1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710  
##  Median :61.80   Median :57.00   Median : 2401   Median : 5.700  
##  Mean   :61.75   Mean   :57.46   Mean   : 3933   Mean   : 5.731  
##  3rd Qu.:62.50   3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540  
##  Max.   :79.00   Max.   :95.00   Max.   :18823   Max.   :10.740  
##                                                                  
##        y                z         
##  Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 4.720   1st Qu.: 2.910  
##  Median : 5.710   Median : 3.530  
##  Mean   : 5.735   Mean   : 3.539  
##  3rd Qu.: 6.540   3rd Qu.: 4.040  
##  Max.   :58.900   Max.   :31.800  
## 
ggplot(data=diamonds, mapping=aes(x=price, y=..density..)) + labs(x="Diamond Price(USD)", y = "Density") + geom_density(mapping = aes(color = cut, fill = cut), binwidth = 500) + theme_economist() + ggtitle("Diamond Price Density")
## Warning: Ignoring unknown parameters: binwidth

Vis 4

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
fit = lm(iris$Petal.Length~iris$Sepal.Length, data=iris)
ggplot(data = iris, aes(x = Sepal.Length, y = Petal.Length)) + geom_point() + geom_smooth(method="lm") + ylab("Iris Petal Length") + xlab("Iris Sepal Length") 

Vis 5

require(iris)
## Loading required package: iris
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'iris'
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
ggplot(data=iris, aes(x = Sepal.Length, y = Petal.Length)) +  aes(color=Species) + geom_point() + geom_smooth(method = "lm", se = 0) + xlab("Iris Sepal Length") + ylab("Iris Petal Length") + theme_tufte()