First, business things:

homework style & future grade improvement opportunity
  • final grade extra credit: 10 points for a report; 5 points for a presentation in class of the report
  • the report needs to be formatted specifically
  • good examples of a report: http://rpubs.com/gregmaghakian/438803
  • clean, minimal
  • all meat: every table/figure has a comment and a reason for inclusion, explain why you are including it
  • every word, every sentence should matter
  • other good example: https://rpubs.com/maypowerss/hw8
  • long intro, but very clean report overall
  • again, each table and figure makes sense and each inclusion matters for the overall narrative/report

ggplot2 lecture

napoleon 1812 winter retreat from moscow

replicate War and Peace graphic using R

library(HistData)
troops <- Minard.troops
str(troops)
## 'data.frame':    51 obs. of  5 variables:
##  $ long     : num  24 24.5 25.5 26 27 28 28.5 29 30 30.3 ...
##  $ lat      : num  54.9 55 54.5 54.7 54.8 54.9 55 55.1 55.2 55.3 ...
##  $ survivors: int  340000 340000 340000 320000 300000 280000 240000 210000 180000 175000 ...
##  $ direction: Factor w/ 2 levels "A","R": 1 1 1 1 1 1 1 1 1 1 ...
##  $ group    : int  1 1 1 1 1 1 1 1 1 1 ...
#get the path of the troops
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
ggplot(troops, aes(x = long, y = lat, group = group)) + geom_path()

#add color and some line thickness
ggplot(troops, aes(x = long, y = lat, group = group, color = direction, size = survivors)) + geom_path()

#slightly more polished by rounding the segments of the bars
ggplot(troops, aes(x = long, y = lat, group = group, color = direction, size = survivors)) + geom_path(lineend = "round")

#highlight the size of the change in size of the troops
ggplot(troops, aes(x = long, y = lat, group = group, color = direction, size = survivors)) + geom_path(lineend = "round") +
scale_size(range=c(.5, 15))

#this is an example range, but if you dont like the size of the segments, you can change the range values to tweak the appearance
#change the color to match that original figure
ggplot(troops, aes(x = long, y = lat, group = group, color = direction, size = survivors)) + 
  geom_path(lineend = "round") +
  scale_size(range=c(.5, 15)) + scale_colour_manual(values=c("#DFC17E", "#252523")) +
  labs(x = NULL, y = NULL) +
  guides(color = FALSE, size = FALSE)

#you can use r brewer to find hex codes, or use your favorite site
# adding city names
# we need this second dataset to get this info
# all the datasets we are using share latitude and longitude, which is how we can anchor them into the same graph
cities <- Minard.cities
str(cities)
## 'data.frame':    20 obs. of  3 variables:
##  $ long: num  24 25.3 26.4 26.8 27.7 27.6 28.5 28.7 29.2 30.2 ...
##  $ lat : num  55 54.7 54.4 54.3 55.2 53.9 54.3 55.5 54.4 55.3 ...
##  $ city: Factor w/ 20 levels "Bobr","Chjat",..: 5 18 15 9 4 7 16 13 1 19 ...
#add the city name data to the graph
ggplot() +
  geom_path(data = troops, aes(x = long, y = lat, group = group, color = direction, size = survivors), lineend = "round") +
  geom_point(data = cities, aes(x = long, y = lat)) +
  geom_text(data = cities, aes(x = long, y = lat, label = city), vjust = 1.5) +
  scale_size(range = c(0.5, 15)) + 
  scale_colour_manual(values = c("#DFC17E", "#252523")) +
  labs(x = NULL, y = NULL) + 
  guides(color = FALSE, size = FALSE)

#introduce first extra package
#the whole point of this package is to make it prettier
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 3.4.4
#we can use this for some further refinements to change the font styles
ggplot() +
  geom_path(data = troops, aes(x = long, y = lat, group = group, 
                               color = direction, size = survivors),
            lineend = "round") +
  geom_point(data = cities, aes(x = long, y = lat),
             color = "#DC5B44") +
  geom_text_repel(data = cities, aes(x = long, y = lat, label = city),
                  color = "#DC5B44", family = "sans") +
  scale_size(range = c(0.5, 15)) + 
  scale_colour_manual(values = c("#DFC17E", "#252523")) +
  labs(x = NULL, y = NULL) + 
  guides(color = FALSE, size = FALSE)

# even better!
march.1812.plot.simple <- ggplot() +
  geom_path(data = troops, aes(x = long, y = lat, group = group, 
                               color = direction, size = survivors),
            lineend = "round") +
  geom_point(data = cities, aes(x = long, y = lat),
             color = "#DC5B44") +
  geom_text_repel(data = cities, aes(x = long, y = lat, label = city),
                  color = "#DC5B44", family = "sans") +
  scale_size(range = c(0.5, 15)) + 
  scale_colour_manual(values = c("#DFC17E", "#252523")) +
  labs(x = NULL, y = NULL) +
  guides(color = FALSE, size = FALSE) +
  theme_void()

march.1812.plot.simple

# add third dataset of temperature and time
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
temps <- Minard.temp
temps.nice <- temps %>%
  mutate(nice.label = paste0(temp, "°, ", date))
## Warning: package 'bindrcpp' was built under R version 3.4.4
str(temps.nice)
## 'data.frame':    9 obs. of  5 variables:
##  $ long      : num  37.6 36 33.2 32 29.2 28.5 27.2 26.7 25.3
##  $ temp      : int  0 0 -9 -21 -11 -20 -24 -30 -26
##  $ days      : int  6 6 16 5 10 4 3 5 1
##  $ date      : Factor w/ 8 levels "Dec01","Dec06",..: 7 8 4 5 NA 6 1 2 3
##  $ nice.label: chr  "0°, Oct18" "0°, Oct24" "-9°, Nov09" "-21°, Nov14" ...
#put it all together
#use new data based on Minard.temp
temps.1812.plot <- ggplot(data = temps.nice, aes(x = long, y = temp)) +
  geom_line() +
  geom_label(aes(label = nice.label),
            family = "sans", size = 2.5) + 
  labs(x = NULL, y = "° Celsius") +
  scale_x_continuous(limits = ggplot_build(march.1812.plot.simple)$layout$panel_ranges[[1]]$x.range) +
  scale_y_continuous(position = "right") +
  coord_cartesian(ylim = c(-35, 5)) +  # Add some space above/below
  theme_bw(base_family = "sans") +
  theme(panel.grid.major.x = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank(),
        axis.text.x = element_blank(), axis.ticks = element_blank(),
        panel.border = element_blank())

temps.1812.plot

#combine the two plots together in a single graphic using the *gridExtra* package
# rbind is how you bind two figures; 
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
# Combine the two plots
both.1812.plot.simple <- gtable_rbind(ggplotGrob(march.1812.plot.simple),
                               ggplotGrob(temps.1812.plot))
#this is the meat; everything else below is to make it pretty

# Adjust panels
panels <- both.1812.plot.simple$layout$t[grep("panel", both.1812.plot.simple$layout$name)]

# Because this plot doesn't use coord_equal, since it's not a map, we can use whatever relative numbers we want, like a 3:1 ratio
both.1812.plot.simple$heights[panels] <- unit(c(3, 1), "null")

grid::grid.newpage()
grid::grid.draw(both.1812.plot.simple)

CREDIT

The code for replicating Minard’s 1812 plot was adapted from Andrew Heiss: https://github.com/andrewheiss/fancy-minard

what else can we do to improve the graphic?