In today’s class we’ll use plotly to make your analysis interactive.

New functions and concepts:




Class preparations




library(tidyverse)
library(tidycensus)
options(tigris_use_cache = TRUE)
library(sf)
library(scales)
library(RColorBrewer)
library(viridis)
library(plotly)

# import Brooklyn census tracts geometry and 1 variable
raw_ancestry_shp <- get_acs(geography = "tract", 
                            variables ="B04006_001", 
                            state='NY',
                            county = 'Kings',
                            geometry = TRUE, 
                            year = 2019)

# import ancestry data for Brooklyn census tracts
raw_ancestry <- get_acs(geography = "tract", 
                        table="B04006", 
                        state='NY',
                        county = 'Kings',
                        geometry = FALSE, 
                        year = 2019)

# process ancestry table data
west_indian <- raw_ancestry %>% 
  pivot_wider(names_from = variable, values_from = c(estimate, moe)) %>% 
  # use rowSums() to add up all of your columns, across() allows you to select some columns
  mutate(west_indian = rowSums(across(estimate_B04006_095:estimate_B04006_106))) %>% 
  # keep the west indian columns to check your sum
  select(GEOID, NAME, west_indian, estimate_B04006_095:estimate_B04006_106) %>% 
  # now that I looked at the table, I can remove each individual west indian variable
  select(GEOID, NAME, west_indian)


### import neighborhood shapefile for new york city
raw_nabes <- st_read("data/raw/nynta2020_21c/nynta2020.shp")

# select only the columns we want
nabes <- raw_nabes %>%
  select(BoroName, CountyFIPS, NTA2020, NTAName) 

#### join processed ancestry data to the shapefile
#### change the projection to 2263, the NYC projection of choice
#### then add the name of the neighborhood to the census tracts using a spatial join
ancestry_shp <- raw_ancestry_shp %>% 
  select(GEOID, estimate, moe, geometry) %>% 
  rename(total = estimate,
         total_moe = moe) %>% 
  filter(total > 0) %>% 
  left_join(west_indian, by = "GEOID") %>% 
  mutate(pct_west_indian = round(west_indian/total, 3)) %>% 
  select(GEOID, NAME, total, west_indian, pct_west_indian, total_moe, geometry) %>% 
  st_transform(2263) %>%  # change projection to NYC projection
  st_join(nabes %>% select(NTAName, geometry), join = st_intersects) # spatial join

#### aggregate the ancestry census tract data to the neighborhood level
west_indian_nabe_stats <- st_drop_geometry(ancestry_shp) %>% 
  group_by(NTAName) %>% 
  summarise(`Neighborhood Est. Total Population` = sum(total),
            `Neighborhood Est. Total West Indian Population` = sum(west_indian)) %>% 
  mutate(`Neighborhood Est. Percent West Indian Ancestry` = percent(`Neighborhood Est. Total West Indian Population`/`Neighborhood Est. Total Population`, accuracy = 1L))

#### join the neighborhood ancestry data to census tracts to compare the tract to the neighborhood
tract_west_indian_nabes <- ancestry_shp %>% 
  left_join(west_indian_nabe_stats, by = "NTAName") 


A map of the data is helpful.

### create static map
west_indian_bk_plot <- ggplot()  + 
  geom_sf(data = tract_west_indian_nabes, mapping = aes(fill = pct_west_indian), color = "#ffffff", lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       na.value = "#fafafa",
                       # na.value = "transparent",
                       name="Percent West Indian Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "Brooklyn, West Indian Ancestry by Census Tract",
    caption = "Source: American Community Survey, 2015-19"
  )  + 
  geom_sf(data = nabes %>% filter(BoroName == "Brooklyn"), 
          color = "black", fill = NA, lwd = .5)

west_indian_bk_plot


But it doesn’t show us any additional context. You can make any ggplot object interactive with the ggplotly() function in plotly.

### make it interactive
ggplotly(west_indian_bk_plot)


To make it more useful, you can explicitly define the tooltip.

west_indian_bk_plotly <- ggplot()  + 
  geom_sf(data = tract_west_indian_nabes, aes(fill = pct_west_indian,
                                   text = paste0("Neighborhood: ", NTAName,
                                   "<br>Tract Percent West Indian : ", scales::percent(pct_west_indian, accuracy=1L),
                                   "<br>Neighborhood Percent West Indian : ", `Neighborhood Est. Percent West Indian Ancestry`)), 
          color = "#ffffff", lwd = 0) +
  theme_void() +
  scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
                       direction = 1,
                       na.value = "#fafafa",
                       name="Percent West Indian Ancestry (%)",
                       labels=percent_format(accuracy = 1L)) +
  labs(
    title = "Brooklyn, West Indian Ancestry",
    caption = "Source: American Community Survey, 2015-19"
  )  + 
  geom_sf(data = nabes %>% filter(BoroName == "Brooklyn"), 
          color = "black", fill = NA, lwd = .5)
## Warning: Ignoring unknown aesthetics: text
ggplotly(west_indian_bk_plotly, tooltip = "text")


ggplotly is a really easy way to make your plots and maps interactive to explore.





Interactive charts

You can make any ggplot chart interactive too.

Let’s explore the Georgia congressional maps from a few weeks, and compare them to the map proposed by the Legislature

# import Princeton Gerrymandering Project's benchmarks for:

# BIPOC representation
congress_mvap <- read_csv("data/raw/dra/congress_proposed_mvap_pcts.csv") %>% 
  mutate(rank = rank(proposed_mvap))

# Black representation
congress_bvap <- read_csv("data/raw/dra/congress_proposed_bvap_pcts.csv") %>% 
  mutate(rank = rank(proposed_bvap))

# Partisan lean
congress_dem <- read_csv("data/raw/dra/congress_proposed_dem_pcts.csv") %>% 
  mutate(rank = rank(p50))


# import GA class congressional maps
raw_1 <- read_csv("data/raw/dra/class/district-statistics_1.csv")
## Warning: 16 parsing failures.
## row col   expected     actual                                           file
##   1  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
##   2  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
##   3  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
##   4  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
##   5  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
raw_2 <- read_csv("data/raw/dra/class/district-statistics_2.csv")
## Warning: 16 parsing failures.
## row col   expected     actual                                           file
##   1  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
##   2  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
##   3  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
##   4  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
##   5  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
raw_3 <- read_csv("data/raw/dra/class/district-statistics_3.csv")
## Warning: 16 parsing failures.
## row col   expected     actual                                           file
##   1  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
##   2  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
##   3  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
##   4  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
##   5  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
raw_4 <- read_csv("data/raw/dra/class/district-statistics_4.csv")
## Warning: 16 parsing failures.
## row col   expected     actual                                           file
##   1  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
##   2  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
##   3  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
##   4  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
##   5  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
raw_5 <- read_csv("data/raw/dra/class/district-statistics_5.csv")
## Warning: 16 parsing failures.
## row col   expected     actual                                           file
##   1  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
##   2  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
##   3  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
##   4  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
##   5  -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
# import Congressional map proposed by the Redistricting committee
raw_proposed <- read_csv("data/raw/dra/congress_proposed_all_2020data.csv")

########################################
### process each map to a data frame ###
########################################
rank_1 <- raw_1 %>% 
  filter(ID != "Un",  
         ID != "Summary" ) %>% 
  mutate(rank_dem = rank(Dem),
         rank_black = rank(Black),
         rank_bipoc = rank(Minority),
         dot_size = 1) %>% 
  select(ID, Black, Minority, Dem, rank_dem:dot_size) %>% 
  mutate(run = "class 1")

rank_2 <- raw_2 %>% 
  filter(ID != "Un",  
         ID != "Summary" ) %>% 
  mutate(rank_dem = rank(Dem),
         rank_black = rank(Black),
         rank_bipoc = rank(Minority),
         dot_size = 1) %>% 
  select(ID, Black, Minority, Dem, rank_dem:dot_size) %>% 
  mutate(run = "class 2")

rank_3 <- raw_3 %>% 
  filter(ID != "Un",  
         ID != "Summary" ) %>% 
  mutate(rank_dem = rank(Dem),
         rank_black = rank(Black),
         rank_bipoc = rank(Minority),
         dot_size = 1) %>% 
  select(ID, Black, Minority, Dem, rank_dem:dot_size) %>% 
  mutate(run = "class 3")

rank_4 <- raw_4 %>% 
  filter(ID != "Un",  
         ID != "Summary" ) %>% 
  mutate(rank_dem = rank(Dem),
         rank_black = rank(Black),
         rank_bipoc = rank(Minority),
         dot_size = 1) %>% 
  select(ID, Black, Minority, Dem, rank_dem:dot_size) %>% 
  mutate(run = "class 4")

rank_5 <- raw_5 %>% 
  filter(ID != "Un",  
         ID != "Summary" ) %>% 
  mutate(rank_dem = rank(Dem),
         rank_black = rank(Black),
         rank_bipoc = rank(Minority),
         dot_size = 1) %>% 
  select(ID, Black, Minority, Dem, rank_dem:dot_size) %>% 
  mutate(run = "class 5")

rank_proposed <- raw_proposed %>% 
  rename(ID = district, 
         Dem = partisan,
         Black = pct_bvp,
         Minority = pct_bp_) %>% 
  mutate(rank_dem = rank(Dem),
         rank_black = rank(Black),
         rank_bipoc = rank(Minority),
         dot_size = 1.5) %>% 
  select(ID, Black, Minority, Dem, rank_dem:dot_size) %>% 
  mutate(run = "Legislature proposed")

  

## create data frame of all competitive maps
comparison <- rank_1 %>% 
  rbind(rank_2) %>% 
  rbind(rank_3) %>% 
  rbind(rank_4) %>% 
  rbind(rank_5) %>% 
  rbind(rank_proposed) 



##################################################
### create charts to explore minority representation ###
##################################################

minority_comparison <- ggplot() + 
  expand_limits(x=c(0,14), y=c(0, 1)) +
  geom_rect(data=congress_mvap, mapping=aes(xmin=min(rank), 
                                           xmax=max(rank), ymin=0.45, ymax=0.55), 
            fill = "#e0e0d4", color=NA, alpha=0.25) +
  geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p1,ymax=p25), fill="#ffd65c", alpha=0.5) +
  geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p25,ymax=p75), fill="#a87e23", alpha=0.5) +
  geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p75,ymax=p99), fill="#ffd65c", alpha=0.5) +
  geom_point(data = comparison, aes(x=rank_bipoc, 
                                    y=Minority, 
                                    color = run))  +
  scale_y_continuous(labels = function(x) paste0(x*100, "%")) + 
  xlab("Congressional Districts, sorted lowest to highest BIPOC residents") +
  labs(color='Map') +
  theme(
    panel.background = element_rect(fill = "transparent"), # bg of the panel
    plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
    legend.background = element_rect(fill = "transparent"),
    panel.grid.major.x = element_blank(), 
    panel.grid.major.y = element_line(colour = "#e9e9e9"),
    panel.grid.minor = element_blank(),
    axis.text.x=element_blank(), 
    axis.title.y = element_blank(),
    axis.ticks = element_blank())

minority_comparison


Make it interactive

##################################################
### create charts to explore minority representation ###
##################################################

minority_comparison <- ggplot() + 
  expand_limits(x=c(0,14), y=c(0, 1)) +
  geom_rect(data=congress_mvap, mapping=aes(xmin=min(rank), 
                                           xmax=max(rank), ymin=0.45, ymax=0.55), 
            fill = "#e0e0d4", color=NA, alpha=0.25) +
  geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p1,ymax=p25), fill="#ffd65c", alpha=0.5) +
  geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p25,ymax=p75), fill="#a87e23", alpha=0.5) +
  geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p75,ymax=p99), fill="#ffd65c", alpha=0.5) +
  geom_point(data = comparison, aes(x=rank_bipoc, 
                                    y=Minority, 
                                    color = run, 
                                    text = paste("Run: ", run,
                                                 "<br>Percent BIPOC: ", scales::percent(Minority, scale = 100, accuracy = 1),
                                                 "<br>Percent Black: ", scales::percent(Black, scale = 100, accuracy = 1),
                                                 "<br>Partisan, Percent Democrat: ", scales::percent(Dem, scale = 100, accuracy = 1))))  +
  scale_y_continuous(labels = function(x) paste0(x*100, "%")) + 
  xlab("Congressional Districts, sorted lowest to highest BIPOC residents") +
  labs(color='Map') +
  theme(
    panel.background = element_rect(fill = "transparent"), # bg of the panel
    plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
    legend.background = element_rect(fill = "transparent"),
    panel.grid.major.x = element_blank(), 
    panel.grid.major.y = element_line(colour = "#e9e9e9"),
    panel.grid.minor = element_blank(),
    axis.text.x=element_blank(), 
    axis.title.y = element_blank(),
    axis.ticks = element_blank())
## Warning: Ignoring unknown aesthetics: text
ggplotly(minority_comparison, tooltip = "text")


Make the proposed map stand out

minority_comparison <- ggplot() + 
  expand_limits(x=c(0,14), y=c(0, 1)) +
  geom_rect(data=congress_mvap, mapping=aes(xmin=min(rank), 
                                           xmax=max(rank), ymin=0.45, ymax=0.55), 
            fill = "#e0e0d4", color=NA, alpha=0.25) +
  geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p1,ymax=p25), fill="#ffd65c", alpha=0.5) +
  geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p25,ymax=p75), fill="#a87e23", alpha=0.5) +
  geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p75,ymax=p99), fill="#ffd65c", alpha=0.5) +
  geom_point(data = comparison, aes(x=rank_bipoc, 
                                    y=Minority, 
                                    color = run, 
                                    size = dot_size,
                                    text = paste("Run: ", run,
                                                 "<br>Percent BIPOC: ", scales::percent(Minority, scale = 100, accuracy = 1),
                                                 "<br>Percent Black: ", scales::percent(Black, scale = 100, accuracy = 1),
                                                 "<br>Partisan, Percent Democrat: ", scales::percent(Dem, scale = 100, accuracy = 1))))  +
  scale_y_continuous(labels = function(x) paste0(x*100, "%")) + 
  xlab("Congressional Districts, sorted lowest to highest BIPOC residents") +
  labs(color='Map') +
  guides(size = "none") +  # remove size legend
  theme(
    panel.background = element_rect(fill = "transparent"), # bg of the panel
    plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
    legend.background = element_rect(fill = "transparent"),
    panel.grid.major.x = element_blank(), 
    panel.grid.major.y = element_line(colour = "#e9e9e9"),
    panel.grid.minor = element_blank(),
    axis.text.x=element_blank(), 
    axis.title.y = element_blank(),
    axis.ticks = element_blank())
## Warning: Ignoring unknown aesthetics: text
ggplotly(minority_comparison, tooltip = "text")



In-class exercise

You will begin this in class, and complete it for homework.

Use tidycensus and plotly to create an interactive scatterplot of every city in the country with population > 50,000. The scatterplot should compare per-capita income to the proportion of people over 25 with at least a bachelor’s degree. Hint: cities are called “places” in the census.

In a word document or on canvas that:

Upload your script to the assignment in canvas.