Original Graph

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(patchwork)
library(ggthemes)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
both_sexes <- read_csv("both_sexes.csv")
## New names:
## Rows: 17 Columns: 75
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (74): ...1, year, all_2534, HS_2534, SC_2534, BAp_2534, BAo_2534, GD_25... date
## (1): date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
p1 = ggplot(both_sexes) + geom_line(aes(x = year, y= 1-HS_2534), color = "deeppink4", size = 1.5)+  
  geom_line(aes(x = year, y= 1-SC_2534), color = "deeppink3", size = 1.5) + 
  geom_line(aes(x = year, y= 1-BAp_2534), color = "deeppink2", size = 1.5) +
  scale_y_continuous(limits = c(0,1), labels = label_percent(scale = 100)) +
  annotate("text", x = 1985, y = 0.40, label = "Some college") +
  annotate("text", x = 1968, y = 0.60, label = "College \n graduates") +
  annotate("text", x = 1975, y = 0.95, label = "High school or less") +
  annotate("segment", x = 1968, xend = 1968, y = 0.68, yend = 0.77, colour = "black") +
  annotate("segment", x = 1985, xend = 1985, y = 0.43, yend = 0.75, colour = "black") +
  annotate("segment", x = 1975, xend = 1975, y = 0.87, yend = 0.92, colour = "black") +
  theme_fivethirtyeight()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p2 = ggplot(both_sexes) + geom_line(aes(x = year, y= 1-White_2534), color = "green4", size = 1.5)+  
  geom_line(aes(x = year, y= 1-Black_2534), color = "darkgreen", size = 1.5) + 
  geom_line(aes(x = year, y= 1-Hisp_2534), color = "cadetblue2", size = 1.5) +
  scale_y_continuous(limits = c(0,1), labels = label_percent(scale = 100)) +
  annotate("text", x = 1974, y = 0.95, label = "Non-Hispanic white") +
  annotate("text", x = 1992, y = 0.80, label = "Hispanic") +
  annotate("text", x = 1995, y = 0.40, label = "Black") +
  annotate("segment", x = 1990, xend = 1990, y = 0.70, yend = 0.76, colour = "black") + #hispanic
  annotate("segment", x = 1994, xend = 1994, y = 0.43, yend = 0.50, colour = "black") + # black 
  annotate("segment", x = 1962, xend = 1962, y = 0.89, yend = 0.92, colour = "black") + #non hispanic 
  theme_fivethirtyeight()

p1+p2 +
  plot_annotation(
    title = "Marriage Rates by Education and Race",
    subtitle = "Ages 25 to 34"
  ) 

Our recreation

library(ggthemes)
p1 = ggplot(both_sexes) + 
  geom_line(aes(x = year, y= 1-HS_2534, color = "High School or Less"), size = 1.5, alpha = 0.7) +  
  geom_line(aes(x = year, y= 1-SC_2534, color = "Some College"), size = 1.5, alpha = 0.7) + 
  geom_line(aes(x = year, y= 1-BAo_2534, color = "Bachelor's Degree"), size = 1.5, alpha = 0.7) +
  geom_line(aes(x = year, y= 1-GD_2534, color = "Graduate Degree"), size = 1.5, alpha = 0.7) +
  scale_color_manual(values = c("High School or Less" = "#E69F00", 
                               "Some College" = "#56B4E9", 
                               "Bachelor's Degree" = "#009E73",
                               "Graduate Degree" = "#F0E442")) +
  scale_y_continuous(limits = c(0,1), labels = label_percent(scale = 100)) +
  annotate("text", x = 1985, y = 0.57, label = "Some college") +
  annotate("text", x = 1968, y = 0.40, label = "College \n graduates") +
  annotate("text", x = 1975, y = 0.95, label = "Bachelor's degree") +
  annotate("text", x = 2000, y = 0.25, label = "Graduate degree") + 
  annotate("segment", x = 1968, xend = 1968, y = 0.47, yend = 0.77, colour = "black") + #college grads
  annotate("segment", x = 1985, xend = 1985, y = 0.60, yend = 0.75, colour = "black") + #some college
  annotate("segment", x = 1975, xend = 1975, y = 0.87, yend = 0.92, colour = "black") + #bach
   annotate("segment", x = 2000, xend = 2000, y = 0.27, yend = 0.63, colour = "black") +#grad
  theme_fivethirtyeight() +
  theme(legend.position = "none")


p2 = ggplot(both_sexes) + 
  geom_line(aes(x = year, y= 1-White_2534, color = "Non-Hispanic white"), size = 1.5, alpha = 0.7) +  
  geom_line(aes(x = year, y= 1-Black_2534, color = "Black"), size = 1.5, alpha = 0.7) + 
  geom_line(aes(x = year, y= 1-Hisp_2534,  color = "Hispanic"), size = 1.5, alpha = 0.7) +
  scale_color_manual(values = c("Non-Hispanic white" = "#0072B2", 
                               "Black" = "#CC79A7", 
                               "Hispanic" = "#D55E00")) +
  scale_y_continuous(limits = c(0,1), labels = label_percent(scale = 100)) +
  annotate("text", x = 1974, y = 0.95, label = "Non-Hispanic white") +
  annotate("text", x = 1992, y = 0.80, label = "Hispanic") +
  annotate("text", x = 1995, y = 0.40, label = "Black") +
  annotate("segment", x = 1990, xend = 1990, y = 0.70, yend = 0.76, colour = "black") + #hispanic
  annotate("segment", x = 1994, xend = 1994, y = 0.43, yend = 0.50, colour = "black") + # black 
  annotate("segment", x = 1962, xend = 1962, y = 0.89, yend = 0.92, colour = "black") + #non hispanic 
  theme_fivethirtyeight() +
  theme(legend.position = "none")

p1+p2 +
  plot_annotation(
    title = "Marriage Rates by Education and Race",
    subtitle = "Ages 25 to 34"
  ) 
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_line()`).

Writeup

The original graph, sourced from Five Thirty Eight, shows marriage rates by education and race for people ages 25 to 34. This graph is very effective in many ways. The separation of the data by education and race in two separate graphs reduces clutter. Chapter 21 of DataViz, covers multi-panel figures, explaining how subsetting data can make complex data more digestible. The use of direct labeling also reduces clutter and makes the graph more readable. Adding a legend to this figure would be an example of redundant coding, which is covered in chapter 20 of DataViz. We also felt that the graph’s title, subtitle, and axis labels were clear and readable.

Still, we feel there is some room for improvement in the design of this graph. In our class on accessibility, we discussed the importance of using colorblind friendly color scales. The use of gradient colors, each of which are similar to one another, could fall short for colorblind readers. For categorical data, the Okabe Ito color scale is a good option that is accessible for people with different varieties of colorblindness.

We also incorporated alpha in order to easily distinguish the lines from one another at places of overlap. This decision was based on the approach suggested in DataViz for handling overlapping points. Partial transparency is an effective way to make overlapping data visible and clear.

In the original graph, the graduate degree data is not shown, and bachelor’s and graduate degrees are combined into “some college.” To give a more detailed picture of the data, we added the data for graduate degrees into the graph and separated out those with bachelor degrees only.

When initially reading the original graph on marriage rates by education, it can be difficult to quickly match each line to its label. To help, we repositioned the labels, so that the relative height of each label matched the relative height of each line at x = 0.