Library ggplot2

ggplot2 is a plotting system for R, based on the grammar of graphics, which tries to take the good parts of base and lattice graphics and none of the bad parts. It takes care of many of the fiddly details that make plotting a hassle (like drawing legends) as well as providing a powerful model of graphics that makes it easy to produce complex multi-layered graphics.

Geom_point() color with categorical data

Color gradient is a very powerfull tool of ggplot2 to show a 3rd variable in a 2D plot. Here is an exemple using the ‘mtcars’ dataset.

data("mtcars")
ggplot(mtcars, aes(x = wt, y = mpg, col = disp)) +
  geom_point()

geom_point() : color with categorical data

data("diamonds")
ggplot(diamonds, aes(x = carat, y = price,col=clarity)) +
  geom_point(alpha= 0.4)

geom_smooth ()

dia_plot <- ggplot(diamonds, aes(x = carat, y = price))
dia_plot<-dia_plot+geom_point(alpha= 0.2)

# Plot dia_plot with additional geom_smooth() with se set to FALSE
dia_plot+
  geom_smooth(se= FALSE)

Geom_smooth and color

# Copy the command from above and add aes() with the correct mapping to geom_smooth()
dia_plot <- ggplot(diamonds, aes(x = carat, y = price))
dia_plot <-dia_plot+geom_point(alpha= 0.2)
dia_plot +
  geom_smooth(se = FALSE,aes(col= clarity))

Noise and transparency to show density

Vocab <- read.csv("Vocab.csv")
# Plot 1: Jittered scatter plot 
ggplot(Vocab, aes(x = education, y = vocabulary )) +
  geom_jitter(alpha = 0.2)+
  geom_point (fill = "white", color = "blue", shape = 21)

Faceting

# Plot 1: Jittered scatter plot
ggplot(Vocab, aes(x = education, y = vocabulary )) +
  geom_jitter(alpha = 0.2)+
  facet_grid(~ sex)

Faceting 2

data(iris)
#creatng iris.wide data
# Add column with unique ids
iris$Flower <- 1:nrow(iris)
# creating iris wide
iris.wide <- iris %>%
  gather(key, value, -Species, -Flower) %>%
  separate(key, c("Part", "Measure"), sep="\\.") %>%
  spread(Measure, value)

ggplot(iris.wide, aes(x = Length, y = Width, col = Part)) +
  geom_point()+
  facet_wrap(~ Species)

Bar plot

ggplot(diamonds, aes(x= clarity, fill = cut))+
  geom_bar()

Bar with factor variable as a gradiant color scale

# Definition of a set of blue colors
blues <- brewer.pal(9, "Blues")

# Make a color range using colorRampPalette() and the set of blues
blue_range<-colorRampPalette(blues)
Vocab <- Vocab %>%
  mutate(voc_factor = as.factor(vocabulary))
# Use blue_range to adjust the color of the bars, use scale_fill_manual()
ggplot(Vocab, aes(x = education, fill = voc_factor)) +
  geom_bar(position = "fill") +
  scale_fill_manual("vocabulary", values = blue_range(11))

Dot plot as an upgraded bar plot

ggplot(diamonds, aes(x= clarity, y = carat, col = price))+
  geom_point(alpha=0.5, position = "jitter")

Pie plot

# Create stacked bar plot: thin.bar
thin.bar <- ggplot(mtcars, aes(x = 1, fill = factor(cyl))) +
  geom_bar(width = 0.1)+
  scale_fill_discrete("Cylinder")
  
# Convert thin.bar to pie chart
thin.bar + coord_polar(theta = "y")

Adding periods with geom_rect()

# Print out head of economics
data(economics)
recess <- data.frame(
  begin = c("1969-12-01","1973-11-01","1980-01-01","1981-07-01","1990-07-01","2001-03-01"), 
  end = c("1970-11-01","1975-03-01","1980-07-01","1982-11-01","1991-03-01","2001-11-01"),
  stringsAsFactors = F
  )

library(lubridate)
recess$begin <- ymd (recess$begin)
recess$end <- ymd (recess$end)

# Expand the following command with geom_rect() to draw the recess periods
ggplot(economics, aes(x = date, y = unemploy/pop)) +
  geom_line()+
  geom_rect(data= recess, inherit.aes = FALSE, aes(xmin = begin, xmax = end, ymin =-0.1 , ymax = 0.1), fill = "red", alpha = 0.2)

Box plot : life expectency per continent

data("gapminder")
# Create dataset of 2007 data
gap2007 <- filter(gapminder, year == "2007")

# Compute groupwise mean and median lifeExp
gap2007 %>%
  group_by(continent) %>%
  summarize( mean(lifeExp),
            median(lifeExp))
## # A tibble: 5 × 3
##   continent `mean(lifeExp)` `median(lifeExp)`
##      <fctr>           <dbl>             <dbl>
## 1    Africa        54.80604           52.9265
## 2  Americas        73.60812           72.8990
## 3      Asia        70.72848           72.3960
## 4    Europe        77.64860           78.6085
## 5   Oceania        80.71950           80.7195
# Generate box plots of lifeExp for each continent
gap2007 %>%
  ggplot(aes(x = continent, y = lifeExp)) +
  geom_boxplot()

Density plot

# Compute groupwise measures of spread
gap2007 %>%
  group_by(continent) %>%
  summarize(sd(lifeExp),
            IQR(lifeExp),
            n())
## # A tibble: 5 × 4
##   continent `sd(lifeExp)` `IQR(lifeExp)` `n()`
##      <fctr>         <dbl>          <dbl> <int>
## 1    Africa     9.6307807       11.61025    52
## 2  Americas     4.4409476        4.63200    25
## 3      Asia     7.9637245       10.15200    33
## 4    Europe     2.9798127        4.78250    30
## 5   Oceania     0.7290271        0.51550     2
# Generate overlaid density plots
gap2007 %>%
  ggplot(aes(x = lifeExp, fill = continent)) +
  geom_density(alpha = 0.3)