In today’s class we’ll use plotly to make your analysis interactive.
New functions and concepts:
ggplotlyplotlylibrary(tidyverse)
library(tidycensus)
options(tigris_use_cache = TRUE)
library(sf)
library(scales)
library(RColorBrewer)
library(viridis)
library(plotly)
# import Brooklyn census tracts geometry and 1 variable
raw_ancestry_shp <- get_acs(geography = "tract",
variables ="B04006_001",
state='NY',
county = 'Kings',
geometry = TRUE,
year = 2019)
# import ancestry data for Brooklyn census tracts
raw_ancestry <- get_acs(geography = "tract",
table="B04006",
state='NY',
county = 'Kings',
geometry = FALSE,
year = 2019)
# process ancestry table data
west_indian <- raw_ancestry %>%
pivot_wider(names_from = variable, values_from = c(estimate, moe)) %>%
# use rowSums() to add up all of your columns, across() allows you to select some columns
mutate(west_indian = rowSums(across(estimate_B04006_095:estimate_B04006_106))) %>%
# keep the west indian columns to check your sum
select(GEOID, NAME, west_indian, estimate_B04006_095:estimate_B04006_106) %>%
# now that I looked at the table, I can remove each individual west indian variable
select(GEOID, NAME, west_indian)
### import neighborhood shapefile for new york city
raw_nabes <- st_read("data/raw/nynta2020_21c/nynta2020.shp")
# select only the columns we want
nabes <- raw_nabes %>%
select(BoroName, CountyFIPS, NTA2020, NTAName)
#### join processed ancestry data to the shapefile
#### change the projection to 2263, the NYC projection of choice
#### then add the name of the neighborhood to the census tracts using a spatial join
ancestry_shp <- raw_ancestry_shp %>%
select(GEOID, estimate, moe, geometry) %>%
rename(total = estimate,
total_moe = moe) %>%
filter(total > 0) %>%
left_join(west_indian, by = "GEOID") %>%
mutate(pct_west_indian = round(west_indian/total, 3)) %>%
select(GEOID, NAME, total, west_indian, pct_west_indian, total_moe, geometry) %>%
st_transform(2263) %>% # change projection to NYC projection
st_join(nabes %>% select(NTAName, geometry), join = st_intersects) # spatial join
#### aggregate the ancestry census tract data to the neighborhood level
west_indian_nabe_stats <- st_drop_geometry(ancestry_shp) %>%
group_by(NTAName) %>%
summarise(`Neighborhood Est. Total Population` = sum(total),
`Neighborhood Est. Total West Indian Population` = sum(west_indian)) %>%
mutate(`Neighborhood Est. Percent West Indian Ancestry` = percent(`Neighborhood Est. Total West Indian Population`/`Neighborhood Est. Total Population`, accuracy = 1L))
#### join the neighborhood ancestry data to census tracts to compare the tract to the neighborhood
tract_west_indian_nabes <- ancestry_shp %>%
left_join(west_indian_nabe_stats, by = "NTAName")
### create static map
west_indian_bk_plot <- ggplot() +
geom_sf(data = tract_west_indian_nabes, mapping = aes(fill = pct_west_indian), color = "#ffffff", lwd = 0) +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
na.value = "#fafafa",
# na.value = "transparent",
name="Percent West Indian Ancestry (%)",
labels=percent_format(accuracy = 1L)) +
labs(
title = "Brooklyn, West Indian Ancestry by Census Tract",
caption = "Source: American Community Survey, 2015-19"
) +
geom_sf(data = nabes %>% filter(BoroName == "Brooklyn"),
color = "black", fill = NA, lwd = .5)
west_indian_bk_plot
ggplotly() function in plotly.### make it interactive
ggplotly(west_indian_bk_plot)
west_indian_bk_plotly <- ggplot() +
geom_sf(data = tract_west_indian_nabes, aes(fill = pct_west_indian,
text = paste0("Neighborhood: ", NTAName,
"<br>Tract Percent West Indian : ", scales::percent(pct_west_indian, accuracy=1L),
"<br>Neighborhood Percent West Indian : ", `Neighborhood Est. Percent West Indian Ancestry`)),
color = "#ffffff", lwd = 0) +
theme_void() +
scale_fill_distiller(breaks=c(0, .2, .4, .6, .8, 1),
direction = 1,
na.value = "#fafafa",
name="Percent West Indian Ancestry (%)",
labels=percent_format(accuracy = 1L)) +
labs(
title = "Brooklyn, West Indian Ancestry",
caption = "Source: American Community Survey, 2015-19"
) +
geom_sf(data = nabes %>% filter(BoroName == "Brooklyn"),
color = "black", fill = NA, lwd = .5)
## Warning: Ignoring unknown aesthetics: text
ggplotly(west_indian_bk_plotly, tooltip = "text")
ggplotly is a really easy way to make your plots and maps interactive to explore.You can make any ggplot chart interactive too.
# import Princeton Gerrymandering Project's benchmarks for:
# BIPOC representation
congress_mvap <- read_csv("data/raw/dra/congress_proposed_mvap_pcts.csv") %>%
mutate(rank = rank(proposed_mvap))
# Black representation
congress_bvap <- read_csv("data/raw/dra/congress_proposed_bvap_pcts.csv") %>%
mutate(rank = rank(proposed_bvap))
# Partisan lean
congress_dem <- read_csv("data/raw/dra/congress_proposed_dem_pcts.csv") %>%
mutate(rank = rank(p50))
# import GA class congressional maps
raw_1 <- read_csv("data/raw/dra/class/district-statistics_1.csv")
## Warning: 16 parsing failures.
## row col expected actual file
## 1 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
## 2 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
## 3 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
## 4 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
## 5 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_1.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
raw_2 <- read_csv("data/raw/dra/class/district-statistics_2.csv")
## Warning: 16 parsing failures.
## row col expected actual file
## 1 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
## 2 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
## 3 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
## 4 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
## 5 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_2.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
raw_3 <- read_csv("data/raw/dra/class/district-statistics_3.csv")
## Warning: 16 parsing failures.
## row col expected actual file
## 1 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
## 2 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
## 3 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
## 4 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
## 5 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_3.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
raw_4 <- read_csv("data/raw/dra/class/district-statistics_4.csv")
## Warning: 16 parsing failures.
## row col expected actual file
## 1 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
## 2 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
## 3 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
## 4 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
## 5 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_4.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
raw_5 <- read_csv("data/raw/dra/class/district-statistics_5.csv")
## Warning: 16 parsing failures.
## row col expected actual file
## 1 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
## 2 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
## 3 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
## 4 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
## 5 -- 14 columns 15 columns 'data/raw/dra/class/district-statistics_5.csv'
## ... ... .......... .......... ..............................................
## See problems(...) for more details.
# import Congressional map proposed by the Redistricting committee
raw_proposed <- read_csv("data/raw/dra/congress_proposed_all_2020data.csv")
########################################
### process each map to a data frame ###
########################################
rank_1 <- raw_1 %>%
filter(ID != "Un",
ID != "Summary" ) %>%
mutate(rank_dem = rank(Dem),
rank_black = rank(Black),
rank_bipoc = rank(Minority),
dot_size = 1) %>%
select(ID, Black, Minority, Dem, rank_dem:dot_size) %>%
mutate(run = "class 1")
rank_2 <- raw_2 %>%
filter(ID != "Un",
ID != "Summary" ) %>%
mutate(rank_dem = rank(Dem),
rank_black = rank(Black),
rank_bipoc = rank(Minority),
dot_size = 1) %>%
select(ID, Black, Minority, Dem, rank_dem:dot_size) %>%
mutate(run = "class 2")
rank_3 <- raw_3 %>%
filter(ID != "Un",
ID != "Summary" ) %>%
mutate(rank_dem = rank(Dem),
rank_black = rank(Black),
rank_bipoc = rank(Minority),
dot_size = 1) %>%
select(ID, Black, Minority, Dem, rank_dem:dot_size) %>%
mutate(run = "class 3")
rank_4 <- raw_4 %>%
filter(ID != "Un",
ID != "Summary" ) %>%
mutate(rank_dem = rank(Dem),
rank_black = rank(Black),
rank_bipoc = rank(Minority),
dot_size = 1) %>%
select(ID, Black, Minority, Dem, rank_dem:dot_size) %>%
mutate(run = "class 4")
rank_5 <- raw_5 %>%
filter(ID != "Un",
ID != "Summary" ) %>%
mutate(rank_dem = rank(Dem),
rank_black = rank(Black),
rank_bipoc = rank(Minority),
dot_size = 1) %>%
select(ID, Black, Minority, Dem, rank_dem:dot_size) %>%
mutate(run = "class 5")
rank_proposed <- raw_proposed %>%
rename(ID = district,
Dem = partisan,
Black = pct_bvp,
Minority = pct_bp_) %>%
mutate(rank_dem = rank(Dem),
rank_black = rank(Black),
rank_bipoc = rank(Minority),
dot_size = 1.5) %>%
select(ID, Black, Minority, Dem, rank_dem:dot_size) %>%
mutate(run = "Legislature proposed")
## create data frame of all competitive maps
comparison <- rank_1 %>%
rbind(rank_2) %>%
rbind(rank_3) %>%
rbind(rank_4) %>%
rbind(rank_5) %>%
rbind(rank_proposed)
##################################################
### create charts to explore minority representation ###
##################################################
minority_comparison <- ggplot() +
expand_limits(x=c(0,14), y=c(0, 1)) +
geom_rect(data=congress_mvap, mapping=aes(xmin=min(rank),
xmax=max(rank), ymin=0.45, ymax=0.55),
fill = "#e0e0d4", color=NA, alpha=0.25) +
geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p1,ymax=p25), fill="#ffd65c", alpha=0.5) +
geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p25,ymax=p75), fill="#a87e23", alpha=0.5) +
geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p75,ymax=p99), fill="#ffd65c", alpha=0.5) +
geom_point(data = comparison, aes(x=rank_bipoc,
y=Minority,
color = run)) +
scale_y_continuous(labels = function(x) paste0(x*100, "%")) +
xlab("Congressional Districts, sorted lowest to highest BIPOC residents") +
labs(color='Map') +
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
legend.background = element_rect(fill = "transparent"),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(colour = "#e9e9e9"),
panel.grid.minor = element_blank(),
axis.text.x=element_blank(),
axis.title.y = element_blank(),
axis.ticks = element_blank())
minority_comparison
##################################################
### create charts to explore minority representation ###
##################################################
minority_comparison <- ggplot() +
expand_limits(x=c(0,14), y=c(0, 1)) +
geom_rect(data=congress_mvap, mapping=aes(xmin=min(rank),
xmax=max(rank), ymin=0.45, ymax=0.55),
fill = "#e0e0d4", color=NA, alpha=0.25) +
geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p1,ymax=p25), fill="#ffd65c", alpha=0.5) +
geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p25,ymax=p75), fill="#a87e23", alpha=0.5) +
geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p75,ymax=p99), fill="#ffd65c", alpha=0.5) +
geom_point(data = comparison, aes(x=rank_bipoc,
y=Minority,
color = run,
text = paste("Run: ", run,
"<br>Percent BIPOC: ", scales::percent(Minority, scale = 100, accuracy = 1),
"<br>Percent Black: ", scales::percent(Black, scale = 100, accuracy = 1),
"<br>Partisan, Percent Democrat: ", scales::percent(Dem, scale = 100, accuracy = 1)))) +
scale_y_continuous(labels = function(x) paste0(x*100, "%")) +
xlab("Congressional Districts, sorted lowest to highest BIPOC residents") +
labs(color='Map') +
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
legend.background = element_rect(fill = "transparent"),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(colour = "#e9e9e9"),
panel.grid.minor = element_blank(),
axis.text.x=element_blank(),
axis.title.y = element_blank(),
axis.ticks = element_blank())
## Warning: Ignoring unknown aesthetics: text
ggplotly(minority_comparison, tooltip = "text")
minority_comparison <- ggplot() +
expand_limits(x=c(0,14), y=c(0, 1)) +
geom_rect(data=congress_mvap, mapping=aes(xmin=min(rank),
xmax=max(rank), ymin=0.45, ymax=0.55),
fill = "#e0e0d4", color=NA, alpha=0.25) +
geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p1,ymax=p25), fill="#ffd65c", alpha=0.5) +
geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p25,ymax=p75), fill="#a87e23", alpha=0.5) +
geom_ribbon(data = congress_mvap, aes(x = rank, ymin=p75,ymax=p99), fill="#ffd65c", alpha=0.5) +
geom_point(data = comparison, aes(x=rank_bipoc,
y=Minority,
color = run,
size = dot_size,
text = paste("Run: ", run,
"<br>Percent BIPOC: ", scales::percent(Minority, scale = 100, accuracy = 1),
"<br>Percent Black: ", scales::percent(Black, scale = 100, accuracy = 1),
"<br>Partisan, Percent Democrat: ", scales::percent(Dem, scale = 100, accuracy = 1)))) +
scale_y_continuous(labels = function(x) paste0(x*100, "%")) +
xlab("Congressional Districts, sorted lowest to highest BIPOC residents") +
labs(color='Map') +
guides(size = "none") + # remove size legend
theme(
panel.background = element_rect(fill = "transparent"), # bg of the panel
plot.background = element_rect(fill = "transparent", color = NA), # bg of the plot
legend.background = element_rect(fill = "transparent"),
panel.grid.major.x = element_blank(),
panel.grid.major.y = element_line(colour = "#e9e9e9"),
panel.grid.minor = element_blank(),
axis.text.x=element_blank(),
axis.title.y = element_blank(),
axis.ticks = element_blank())
## Warning: Ignoring unknown aesthetics: text
ggplotly(minority_comparison, tooltip = "text")
You will begin this in class, and complete it for homework.
Use tidycensus and plotly to create an interactive scatterplot of every city in the country with population > 50,000. The scatterplot should compare per-capita income to the proportion of people over 25 with at least a bachelor’s degree. Hint: cities are called “places” in the census.
In a word document or on canvas that:
Upload your script to the assignment in canvas.