ggplot2 basic

qplot() works much like the plot function in base graphics system, it looks for data in a data frame, similar to lattice, or in the parent environment. Plots are made up of aesthetics (size, shape, color) and geoms (points, lines). Factors are important for indicating subsets of the data they should be labeled in an informative way.

library(ggplot2)
str(mpg)
## tibble [234 x 11] (S3: tbl_df/tbl/data.frame)
##  $ manufacturer: chr [1:234] "audi" "audi" "audi" "audi" ...
##  $ model       : chr [1:234] "a4" "a4" "a4" "a4" ...
##  $ displ       : num [1:234] 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
##  $ year        : int [1:234] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
##  $ cyl         : int [1:234] 4 4 4 4 6 6 6 4 4 4 ...
##  $ trans       : chr [1:234] "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
##  $ drv         : chr [1:234] "f" "f" "f" "f" ...
##  $ cty         : int [1:234] 18 21 20 21 16 18 18 18 16 20 ...
##  $ hwy         : int [1:234] 29 29 31 30 26 26 27 26 25 28 ...
##  $ fl          : chr [1:234] "p" "p" "p" "p" ...
##  $ class       : chr [1:234] "compact" "compact" "compact" "compact" ...
library(ggplot2)
qplot(displ,hwy, data = mpg)

qplot(displ,    hwy,    data    =   mpg,    color   =   drv)    

qplot(displ,    hwy,    data    =   mpg,    geom    =   c("point",  "smooth"))  
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

qplot(displ,    hwy,    data    =   mpg,    color   =   drv, geom   =   c("point",  "smooth"))  
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

qplot(hwy,  data    =   mpg,    fill    =   drv)    
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

qplot(displ,    hwy,    data    =   mpg,    facets  =   .   ~   drv)

qplot(hwy,  data    =   mpg,    fill    =   drv, facets =   drv ~   .,  binwidth    =   2)  

load("C:/Users/angul/OneDrive/R/ExploreData/Data/maacs.Rda")
str(maacs)
## 'data.frame':    750 obs. of  5 variables:
##  $ id       : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ eno      : num  141 124 126 164 99 68 41 50 12 30 ...
##  $ duBedMusM: num  2423 2793 3055 775 1634 ...
##  $ pm25     : num  15.6 34.4 39 33.2 27.1 ...
##  $ mopos    : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 2 2 2 2 ...
qplot(log(eno), data    =   maacs)  
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 108 rows containing non-finite values (stat_bin).

qplot(log(eno), data    =   maacs,  fill    =   mopos)  
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 108 rows containing non-finite values (stat_bin).

qplot(log(eno), data    =   maacs,  geom    =   "density")  
## Warning: Removed 108 rows containing non-finite values (stat_density).

qplot(log(eno), data    =   maacs,  geom    =   "density",  color   =   mopos)
## Warning: Removed 108 rows containing non-finite values (stat_density).

qplot(log(pm25),    log(eno),   data    =   maacs)  
## Warning: Removed 184 rows containing missing values (geom_point).

qplot(log(pm25),    log(eno),   data    =   maacs,  shape   =   mopos)  
## Warning: Removed 184 rows containing missing values (geom_point).

qplot(log(pm25),    log(eno),   data    =   
maacs,  color   =   mopos)  
## Warning: Removed 184 rows containing missing values (geom_point).

qplot(log(pm25),    log(eno),   data    =   maacs,  color   =   mopos,  geom    =   c("point",  "smooth"),  method  =   "lm")   
## Warning: Ignoring unknown parameters: method
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 184 rows containing non-finite values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).

qplot(log(pm25),    log(eno),   data    =   maacs,  geom    =   c("point",  "smooth"),  method  =   "lm",   facets  =   .   ~   mopos)  
## Warning: Ignoring unknown parameters: method
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 184 rows containing non-finite values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).

ggplot() is the core function:

qplot(x = log(pm25), y = eno, data = maacs, facets = . ~ mopos, geom = c("point", "smooth"), method = "lm")
## Warning: Ignoring unknown parameters: method
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 184 rows containing non-finite values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).

#qplot(x = log(pm25), y = NocturnalSympt, data = maacs, facets = . ~ bmicat, geom = c("point", "smooth"), method = "lm")
 head(maacs)
##   id eno duBedMusM   pm25 mopos
## 1  1 141      2423 15.560   yes
## 2  2 124      2793 34.370   yes
## 3  3 126      3055 38.953   yes
## 4  4 164       775 33.249   yes
## 5  5  99      1634 27.060   yes
## 6  6  68       939 18.890   yes
g <- ggplot(maacs, aes(log(pm25), duBedMusM))
summary(g)
## data: id, eno, duBedMusM, pm25, mopos [750x5]
## mapping:  x = ~log(pm25), y = ~duBedMusM
## faceting: <ggproto object: Class FacetNull, Facet, gg>
##     compute_layout: function
##     draw_back: function
##     draw_front: function
##     draw_labels: function
##     draw_panels: function
##     finish_data: function
##     init_scales: function
##     map_data: function
##     params: list
##     setup_data: function
##     setup_params: function
##     shrink: TRUE
##     train_scales: function
##     vars: function
##     super:  <ggproto object: Class FacetNull, Facet, gg>

No Plot Yet!

g <- ggplot(maacs, aes(log(pm25), duBedMusM))
#print(g) Error in FUN(X[[i]], ...) : object 'logpm25' not found
p <- g + geom_point()
print(p) #can now plot
## Warning: Removed 237 rows containing missing values (geom_point).

#First Plot: with + geom_point()

g <- ggplot(maacs, aes(log(pm25), duBedMusM))
g + geom_point()
## Warning: Removed 237 rows containing missing values (geom_point).

g + geom_point() + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 237 rows containing non-finite values (stat_smooth).
## Warning: Removed 237 rows containing missing values (geom_point).

Including Plots

You can also embed plots, for example:

g + geom_point() + geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 237 rows containing non-finite values (stat_smooth).
## Warning: Removed 237 rows containing missing values (geom_point).

g + geom_point() + facet_grid(facets = . ~ mopos) + geom_smooth(method = "lm")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 237 rows containing non-finite values (stat_smooth).
## Warning: Removed 237 rows containing missing values (geom_point).

g + geom_point(color = "steelblue", size = 4, alpha = 1/2)
## Warning: Removed 237 rows containing missing values (geom_point).

g + geom_point(aes(color = mopos), size = 4, alpha = 1/2)
## Warning: Removed 237 rows containing missing values (geom_point).

g + geom_point(aes(color = mopos), size = 4, alpha = 1/2) + labs(x = "Log of PM25", y = "ENO", title = "MAACS")
## Warning: Removed 237 rows containing missing values (geom_point).

g + geom_point(aes(color = mopos), size = 4, alpha = 1/2) + labs(x = "Log of PM25", y = "ENO", title = "MAACS") + geom_smooth(size = 2, linetype = 3, method = "lm")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 237 rows containing non-finite values (stat_smooth).
## Warning: Removed 237 rows containing missing values (geom_point).

g + geom_point(aes(color = mopos), size = 4, alpha = 1/2) + labs(x = "Log of PM25", y = "ENO", title = "MAACS") + geom_smooth(size = 4, linetype = 3, method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 237 rows containing non-finite values (stat_smooth).
## Warning: Removed 237 rows containing missing values (geom_point).

g + geom_point(aes(color = mopos), size = 4, alpha = 1/2) + labs(x = "Log of PM25", y = "ENO", title = "MAACS") + geom_smooth(size = 4, linetype = 3, method = "lm", se = FALSE)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 237 rows containing non-finite values (stat_smooth).
## Warning: Removed 237 rows containing missing values (geom_point).

qplot(displ, hwy, data = mpg, facets = . ~ drv) + geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

g + geom_point(aes(color = mopos)) + theme_bw(base_family = "sans")
## Warning: Removed 237 rows containing missing values (geom_point).

testData <- data.frame(x = 1:100, y = rnorm(100)) 

head(testData)
##   x          y
## 1 1  1.6159300
## 2 2 -0.3520294
## 3 3 -0.9507735
## 4 4  0.9054679
## 5 5 -0.3517579
## 6 6  2.0920022
# Setting Outlier
testData[50, 2] <- 100 

plot(testData$x, testData$y, type = "l", ylim = c(-3, 3))

g <- ggplot(testData, aes(x = x, y = y))

g + geom_line()

# Outlier Missing
g + geom_line() + ylim(c(-3, 3))

# Outlier Included
g + geom_line() + coord_cartesian(ylim = c(-3, 3))

cutpoints <- quantile(x = maacs$duBedMusM, breaks = seq(0, 1, length.out = 4), na.rm = TRUE)

cutpoints
##        0%       25%       50%       75%      100% 
##      0.01    308.00   1151.00   3881.00 124919.00
maacs$newCol <- cut(x = maacs$duBedMusM, cutpoints)

levels(maacs$newCol)
## [1] "(0.01,308]"          "(308,1.15e+03]"      "(1.15e+03,3.88e+03]"
## [4] "(3.88e+03,1.25e+05]"
library(ggthemes)

g <- ggplot(data = maacs, aes(x = log(pm25), y = eno))

g + geom_point(alpha = 1/3) + facet_wrap(facets = newCol ~ mopos) + geom_smooth(method = "lm", se = FALSE, col = "steelblue") + theme_bw(base_size = 10) + labs(x = expression("log " * PM[2.5]), title = "MAACS")
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 184 rows containing non-finite values (stat_smooth).
## Warning: Removed 184 rows containing missing values (geom_point).

Read the docs