Import educational attainment and income data for every city in the United States.
Notice how I put the parameter message=FALSE in the code chunk. That keeps it from displaying the messages about loading packages that you usually see in your Console.
library(tidycensus)
library(tidyverse)
library(scales)
options(scipen = 999)
##### Population data #####
raw_city_data <- get_acs(geography = "place",
variables = c(pop = "B01003_001",
total = "B15003_001",
bachelors = "B15003_022",
masters = "B15003_023",
profdegree = "B15003_024",
doctorate = "B15003_025",
per_capita_income = "B19301_001"),
year = 2020,
output = "wide")
city_data <- raw_city_data |>
filter(popE > 50000) |>
mutate(pct_at_least_bachelorsE = (bachelorsE + mastersE + profdegreeE + doctorateE)/totalE) |>
separate(NAME, c("city","state"), sep = ",")
Display the scatterplot, I’m removing the legend so that it doesn’t list every state:
#### Create scatterplot ####
ggplot(data = city_data, aes(x = pct_at_least_bachelorsE, y = per_capita_incomeE, size = popE, color = state)) +
geom_point(alpha = .75) +
guides(size = "none",
color = "none") +
scale_x_continuous(labels = percent_format(accuracy = 1)) +
scale_y_continuous(labels = dollar_format(accuracy = 1)) +
# change legend label formatting
scale_size_area(labels = comma, max_size = 10) +
labs(x = "Proportion of Adults with at least a Bachelor's Degree",
y = "Per-capita Income",
title = "Educational Attainment and Per Capita Income",
caption = "Sources: ACS, 5-yr 2016-20",
size = "Population",
color = "State") +
theme_bw()