Import educational attainment and income data for every city in the United States.

Notice how I put the parameter message=FALSE in the code chunk. That keeps it from displaying the messages about loading packages that you usually see in your Console.

library(tidycensus)
library(tidyverse)
library(scales)
options(scipen = 999)

##### Population data #####
raw_city_data <- get_acs(geography = "place", 
                         variables = c(pop = "B01003_001", 
                                       total = "B15003_001",
                                       bachelors = "B15003_022",
                                       masters = "B15003_023",
                                       profdegree = "B15003_024",
                                       doctorate = "B15003_025",
                                       per_capita_income = "B19301_001"), 
                         year = 2020,
                         output = "wide")

city_data <- raw_city_data |> 
  filter(popE > 50000) |> 
  mutate(pct_at_least_bachelorsE = (bachelorsE + mastersE + profdegreeE + doctorateE)/totalE) |> 
  separate(NAME, c("city","state"), sep = ",")

Display the scatterplot, I’m removing the legend so that it doesn’t list every state:

#### Create scatterplot ####
ggplot(data = city_data, aes(x = pct_at_least_bachelorsE, y = per_capita_incomeE, size = popE, color = state)) +
  geom_point(alpha = .75) +
  guides(size = "none",
         color = "none") +
  scale_x_continuous(labels = percent_format(accuracy = 1)) +
  scale_y_continuous(labels = dollar_format(accuracy = 1)) + 
  # change legend label formatting
  scale_size_area(labels = comma, max_size = 10) +
  labs(x = "Proportion of Adults with at least a Bachelor's Degree", 
       y = "Per-capita Income",
       title = "Educational Attainment and Per Capita Income",
       caption = "Sources: ACS, 5-yr 2016-20", 
       size = "Population",
       color = "State") +
  theme_bw()