Library ggplot2
ggplot2 is a plotting system for R, based on the grammar of graphics, which tries to take the good parts of base and lattice graphics and none of the bad parts. It takes care of many of the fiddly details that make plotting a hassle (like drawing legends) as well as providing a powerful model of graphics that makes it easy to produce complex multi-layered graphics.
Geom_point() color with categorical data
Color gradient is a very powerfull tool of ggplot2 to show a 3rd variable in a 2D plot. Here is an exemple using the ‘mtcars’ dataset.
data("mtcars")
ggplot(mtcars, aes(x = wt, y = mpg, col = disp)) +
geom_point()geom_point() : color with categorical data
data("diamonds")
ggplot(diamonds, aes(x = carat, y = price,col=clarity)) +
geom_point(alpha= 0.4)geom_smooth ()
dia_plot <- ggplot(diamonds, aes(x = carat, y = price))
dia_plot<-dia_plot+geom_point(alpha= 0.2)
# Plot dia_plot with additional geom_smooth() with se set to FALSE
dia_plot+
geom_smooth(se= FALSE)Geom_smooth and color
# Copy the command from above and add aes() with the correct mapping to geom_smooth()
dia_plot <- ggplot(diamonds, aes(x = carat, y = price))
dia_plot <-dia_plot+geom_point(alpha= 0.2)
dia_plot +
geom_smooth(se = FALSE,aes(col= clarity))Noise and transparency to show density
Vocab <- read.csv("Vocab.csv")
# Plot 1: Jittered scatter plot
ggplot(Vocab, aes(x = education, y = vocabulary )) +
geom_jitter(alpha = 0.2)+
geom_point (fill = "white", color = "blue", shape = 21)Faceting
# Plot 1: Jittered scatter plot
ggplot(Vocab, aes(x = education, y = vocabulary )) +
geom_jitter(alpha = 0.2)+
facet_grid(~ sex)Faceting 2
data(iris)
#creatng iris.wide data
# Add column with unique ids
iris$Flower <- 1:nrow(iris)
# creating iris wide
iris.wide <- iris %>%
gather(key, value, -Species, -Flower) %>%
separate(key, c("Part", "Measure"), sep="\\.") %>%
spread(Measure, value)
ggplot(iris.wide, aes(x = Length, y = Width, col = Part)) +
geom_point()+
facet_wrap(~ Species)Bar plot
ggplot(diamonds, aes(x= clarity, fill = cut))+
geom_bar()Bar with factor variable as a gradiant color scale
# Definition of a set of blue colors
blues <- brewer.pal(9, "Blues")
# Make a color range using colorRampPalette() and the set of blues
blue_range<-colorRampPalette(blues)
Vocab <- Vocab %>%
mutate(voc_factor = as.factor(vocabulary))
# Use blue_range to adjust the color of the bars, use scale_fill_manual()
ggplot(Vocab, aes(x = education, fill = voc_factor)) +
geom_bar(position = "fill") +
scale_fill_manual("vocabulary", values = blue_range(11))Dot plot as an upgraded bar plot
ggplot(diamonds, aes(x= clarity, y = carat, col = price))+
geom_point(alpha=0.5, position = "jitter")Pie plot
# Create stacked bar plot: thin.bar
thin.bar <- ggplot(mtcars, aes(x = 1, fill = factor(cyl))) +
geom_bar(width = 0.1)+
scale_fill_discrete("Cylinder")
# Convert thin.bar to pie chart
thin.bar + coord_polar(theta = "y")Adding periods with geom_rect()
# Print out head of economics
data(economics)
recess <- data.frame(
begin = c("1969-12-01","1973-11-01","1980-01-01","1981-07-01","1990-07-01","2001-03-01"),
end = c("1970-11-01","1975-03-01","1980-07-01","1982-11-01","1991-03-01","2001-11-01"),
stringsAsFactors = F
)
library(lubridate)
recess$begin <- ymd (recess$begin)
recess$end <- ymd (recess$end)
# Expand the following command with geom_rect() to draw the recess periods
ggplot(economics, aes(x = date, y = unemploy/pop)) +
geom_line()+
geom_rect(data= recess, inherit.aes = FALSE, aes(xmin = begin, xmax = end, ymin =-0.1 , ymax = 0.1), fill = "red", alpha = 0.2)Box plot : life expectency per continent
data("gapminder")
# Create dataset of 2007 data
gap2007 <- filter(gapminder, year == "2007")
# Compute groupwise mean and median lifeExp
gap2007 %>%
group_by(continent) %>%
summarize( mean(lifeExp),
median(lifeExp))## # A tibble: 5 × 3
## continent `mean(lifeExp)` `median(lifeExp)`
## <fctr> <dbl> <dbl>
## 1 Africa 54.80604 52.9265
## 2 Americas 73.60812 72.8990
## 3 Asia 70.72848 72.3960
## 4 Europe 77.64860 78.6085
## 5 Oceania 80.71950 80.7195
# Generate box plots of lifeExp for each continent
gap2007 %>%
ggplot(aes(x = continent, y = lifeExp)) +
geom_boxplot()Density plot
# Compute groupwise measures of spread
gap2007 %>%
group_by(continent) %>%
summarize(sd(lifeExp),
IQR(lifeExp),
n())## # A tibble: 5 × 4
## continent `sd(lifeExp)` `IQR(lifeExp)` `n()`
## <fctr> <dbl> <dbl> <int>
## 1 Africa 9.6307807 11.61025 52
## 2 Americas 4.4409476 4.63200 25
## 3 Asia 7.9637245 10.15200 33
## 4 Europe 2.9798127 4.78250 30
## 5 Oceania 0.7290271 0.51550 2
# Generate overlaid density plots
gap2007 %>%
ggplot(aes(x = lifeExp, fill = continent)) +
geom_density(alpha = 0.3)