Harold Nelson
10/29/2018
library(tidyverse)
## ── Attaching packages ── tidyverse 1.2.1 ──
## ✔ ggplot2 3.0.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ───── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(socviz)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggmap)
##
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
##
## wind
library(maps)
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
Look at the election dataframe from socviz.
glimpse(election)
## Observations: 51
## Variables: 22
## $ state <chr> "Alabama", "Alaska", "Arizona", "Arkansas", "Cali...
## $ st <chr> "AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "...
## $ fips <dbl> 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15, 16, 17, ...
## $ total_vote <dbl> 2123372, 318608, 2604657, 1130635, 14237884, 2780...
## $ vote_margin <dbl> 588708, 46933, 91234, 304378, 4269978, 136386, 22...
## $ winner <chr> "Trump", "Trump", "Trump", "Trump", "Clinton", "C...
## $ party <chr> "Republican", "Republican", "Republican", "Republ...
## $ pct_margin <dbl> 27.73, 14.73, 3.50, 26.92, 29.99, 4.91, 13.64, 11...
## $ r_points <dbl> 27.72, 14.73, 3.50, 26.92, -29.99, -4.91, -13.64,...
## $ d_points <dbl> -27.72, -14.73, -3.50, -26.92, 29.99, 4.91, 13.64...
## $ pct_clinton <dbl> 34.36, 36.55, 44.58, 33.65, 61.48, 48.16, 54.57, ...
## $ pct_trump <dbl> 62.08, 51.28, 48.08, 60.57, 31.49, 43.25, 40.93, ...
## $ pct_johnson <dbl> 2.09, 5.88, 4.08, 2.64, 3.36, 5.18, 2.96, 3.33, 1...
## $ pct_other <dbl> 1.46, 6.29, 3.25, 3.13, 3.66, 3.41, 1.55, 1.88, 3...
## $ clinton_vote <dbl> 729547, 116454, 1161167, 380494, 8753788, 1338870...
## $ trump_vote <dbl> 1318255, 163387, 1252401, 684872, 4483810, 120248...
## $ johnson_vote <dbl> 44467, 18725, 106327, 29829, 478500, 144121, 4867...
## $ other_vote <dbl> 31103, 20042, 84762, 35440, 521786, 94772, 25457,...
## $ ev_dem <dbl> 9, 3, 11, 6, 0, 0, 0, 0, 0, 29, 16, 0, 4, 0, 11, ...
## $ ev_rep <dbl> 0, 0, 0, 0, 55, 9, 7, 3, 3, 0, 0, 3, 0, 20, 0, 0,...
## $ ev_oth <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0...
## $ census <chr> "South", "West", "West", "South", "West", "West",...
Look at Healy’s graphic using facet_wrap and separate dotplots for each region. Fiddle with fig.height and fig.width to get something you’re pleased with.
party_colors <- c("#2E74C0", "#CB454A")
p0 <- ggplot(data = subset(election, st %nin% "DC"),
mapping = aes(x = r_points,
y = reorder(state, r_points),
color = party))
p1 <- p0 + geom_vline(xintercept = 0, color = "gray30") +
geom_point(size = 2)
p2 <- p1 + scale_color_manual(values = party_colors)
p3 <- p2 + scale_x_continuous(breaks = c(-30, -20, -10, 0, 10, 20, 30, 40),
labels = c("30\n (Clinton)", "20", "10", "0",
"10", "20", "30", "40\n(Trump)"))
p3 + facet_wrap(~ census, ncol=1, scales="free_y") +
guides(color=FALSE) + labs(x = "Point Margin", y = "") +
theme(axis.text=element_text(size=8))
The first ggplot object won’t show anything since it has no geom. What does p1 add?
p1
What does p2 add?
p2
What about p3?
p3
What happens if we remove the free y scaling?
p3 + facet_wrap(~ census, ncol=1) +
guides(color=FALSE) + labs(x = "Point Margin", y = "") +
theme(axis.text=element_text(size=8))
us_states <- map_data("state")
head(us_states)
## long lat group order region subregion
## 1 -87.46201 30.38968 1 1 alabama <NA>
## 2 -87.48493 30.37249 1 2 alabama <NA>
## 3 -87.52503 30.37249 1 3 alabama <NA>
## 4 -87.53076 30.33239 1 4 alabama <NA>
## 5 -87.57087 30.32665 1 5 alabama <NA>
## 6 -87.58806 30.32665 1 6 alabama <NA>
WA_map = filter(us_states,region == "washington")
DT::datatable(WA_map)
table(WA_map$subregion)
##
## lopez island main orcas island san juan island
## 17 448 19 17
## whidbey island
## 44
Note that the map is created by using the points to draw polygons in the designated order.
p <- ggplot(data = us_states,
mapping = aes(x = long, y = lat,
group = group))
p + geom_polygon(fill = "white", color = "black")
p <- ggplot(data = WA_map,
mapping = aes(x = long, y = lat,
group = group))
p + geom_polygon(fill = "white", color = "black")
Map fill to subregion instead of region. Note the tilt and apparent spherical surface.
p <- ggplot(data = WA_map,
mapping = aes(x = long, y = lat,
group = group, fill = subregion))
p + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
guides(fill = FALSE)
Look at the merged dataframe with DT::datatable. Note the redundancy in the election data.
election$region <- tolower(election$state)
us_states_elec <- left_join(us_states, election)
## Joining, by = "region"
DT::datatable(us_states_elec)
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
Using the joined file of map and election data we can map fill to any of the election variables. First we want Healy’s map theme, which is in the appendix. Try the graphic without theme_map().
theme_map <- function(base_size=9, base_family="") {
require(grid)
theme_bw(base_size=base_size, base_family=base_family) %+replace%
theme(axis.line=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
panel.background=element_blank(),
panel.border=element_blank(),
panel.grid=element_blank(),
panel.spacing=unit(0, "lines"),
plot.background=element_blank(),
legend.justification = c(0,0),
legend.position = c(0,0)
)
}
p0 <- ggplot(data = us_states_elec,
mapping = aes(x = long, y = lat,
group = group, fill = party))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p2 <- p1 + scale_fill_manual(values = party_colors) +
labs(title = "Election Results 2016", fill = NULL)
p2 + theme_map()
## Loading required package: grid
How would this look without theme_map?
p2
Healy shows several different versions of the election map with different options for color. The last one is probably the best.
p0 <- ggplot(data = subset(us_states_elec,
region %nin% "district of columbia"),
aes(x = long, y = lat, group = group, fill = d_points))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p2 <- p1 + scale_fill_gradient2(low = "red",
mid = scales::muted("purple"),
high = "blue") +
labs(title = "Winning margins")
p2 + theme_map() + labs(fill = "Percent")
Note the impact of leaving DC in.
p0 <- ggplot(data = us_states_elec,
aes(x = long, y = lat, group = group, fill = d_points))
p1 <- p0 + geom_polygon(color = "gray90", size = 0.1) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45)
p2 <- p1 + scale_fill_gradient2(low = "red",
mid = scales::muted("purple"),
high = "blue") +
labs(title = "Winning margins")
p2 + theme_map() + labs(fill = "Percent")
Use glimpse to look at county_map and county_data, which are in the socviz package.
glimpse(county_map)
## Observations: 191,382
## Variables: 7
## $ long <dbl> 1225889, 1235324, 1244873, 1244129, 1272010, 1276797, 12...
## $ lat <dbl> -1275020, -1274008, -1272331, -1267515, -1262889, -12955...
## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1...
## $ hole <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, ...
## $ piece <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## $ group <fct> 0500000US01001.1, 0500000US01001.1, 0500000US01001.1, 05...
## $ id <chr> "01001", "01001", "01001", "01001", "01001", "01001", "0...
Note that there are almost 200,000 entries and 7 variables.
glimpse(county_data)
## Observations: 3,195
## Variables: 32
## $ id <chr> "0", "01000", "01001", "01003", "01005", "010...
## $ name <chr> NA, "1", "Autauga County", "Baldwin County", ...
## $ state <fct> NA, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, A...
## $ census_region <fct> NA, South, South, South, South, South, South,...
## $ pop_dens <fct> [ 50, 100), [ 50, 100), [ 50, 100), ...
## $ pop_dens4 <fct> [ 45, 118), [ 45, 118), [ 45, 118), [118,7...
## $ pop_dens6 <fct> [ 82, 215), [ 82, 215), [ 82, 215), [ 82, ...
## $ pct_black <fct> [10.0,15.0), [25.0,50.0), [15.0,25.0), [ 5.0,...
## $ pop <int> 318857056, 4849377, 55395, 200111, 26887, 225...
## $ female <dbl> 50.8, 51.5, 51.5, 51.2, 46.5, 46.0, 50.6, 45....
## $ white <dbl> 77.7, 69.8, 78.1, 87.3, 50.2, 76.3, 96.0, 27....
## $ black <dbl> 13.2, 26.6, 18.4, 9.5, 47.6, 22.1, 1.8, 69.9,...
## $ travel_time <dbl> 25.5, 24.2, 26.2, 25.9, 24.6, 27.6, 33.9, 26....
## $ land_area <dbl> 3531905.43, 50645.33, 594.44, 1589.78, 884.88...
## $ hh_income <int> 53046, 43253, 53682, 50221, 32911, 36447, 441...
## $ su_gun4 <fct> NA, NA, [11,54], [11,54], [ 5, 8), [11,54], [...
## $ su_gun6 <fct> NA, NA, [10,12), [10,12), [ 7, 8), [10,12), [...
## $ fips <dbl> 0, 1000, 1001, 1003, 1005, 1007, 1009, 1011, ...
## $ votes_dem_2016 <int> NA, NA, 5908, 18409, 4848, 1874, 2150, 3530, ...
## $ votes_gop_2016 <int> NA, NA, 18110, 72780, 5431, 6733, 22808, 1139...
## $ total_votes_2016 <int> NA, NA, 24661, 94090, 10390, 8748, 25384, 470...
## $ per_dem_2016 <dbl> NA, NA, 0.23956855, 0.19565310, 0.46660250, 0...
## $ per_gop_2016 <dbl> NA, NA, 0.7343579, 0.7735147, 0.5227141, 0.76...
## $ diff_2016 <int> NA, NA, 12202, 54371, 583, 4859, 20658, 2391,...
## $ per_dem_2012 <dbl> NA, NA, 0.2657577, 0.2156657, 0.5125229, 0.26...
## $ per_gop_2012 <dbl> NA, NA, 0.7263374, 0.7738975, 0.4833755, 0.73...
## $ diff_2012 <int> NA, NA, 11012, 47443, 334, 3931, 17780, 2808,...
## $ winner <chr> NA, NA, "Trump", "Trump", "Trump", "Trump", "...
## $ partywinner16 <chr> NA, NA, "Republican", "Republican", "Republic...
## $ winner12 <chr> NA, NA, "Romney", "Romney", "Obama", "Romney"...
## $ partywinner12 <chr> NA, NA, "Republican", "Republican", "Democrat...
## $ flipped <chr> NA, NA, "No", "No", "Yes", "No", "No", "No", ...
Now join these two dataframes, keeping just the subset of variables from county_data selected by Healy. Use glimpse to examine the result.
county_full <- left_join(county_map, county_data, by = "id")
glimpse(county_full)
## Observations: 191,382
## Variables: 38
## $ long <dbl> 1225889, 1235324, 1244873, 1244129, 1272010, ...
## $ lat <dbl> -1275020, -1274008, -1272331, -1267515, -1262...
## $ order <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...
## $ hole <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL...
## $ piece <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ group <fct> 0500000US01001.1, 0500000US01001.1, 0500000US...
## $ id <chr> "01001", "01001", "01001", "01001", "01001", ...
## $ name <chr> "Autauga County", "Autauga County", "Autauga ...
## $ state <fct> AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, AL, A...
## $ census_region <fct> South, South, South, South, South, South, Sou...
## $ pop_dens <fct> [ 50, 100), [ 50, 100), [ 50, 100), ...
## $ pop_dens4 <fct> [ 45, 118), [ 45, 118), [ 45, 118), [ 45, ...
## $ pop_dens6 <fct> [ 82, 215), [ 82, 215), [ 82, 215), [ 82, ...
## $ pct_black <fct> [15.0,25.0), [15.0,25.0), [15.0,25.0), [15.0,...
## $ pop <int> 55395, 55395, 55395, 55395, 55395, 55395, 553...
## $ female <dbl> 51.5, 51.5, 51.5, 51.5, 51.5, 51.5, 51.5, 51....
## $ white <dbl> 78.1, 78.1, 78.1, 78.1, 78.1, 78.1, 78.1, 78....
## $ black <dbl> 18.4, 18.4, 18.4, 18.4, 18.4, 18.4, 18.4, 18....
## $ travel_time <dbl> 26.2, 26.2, 26.2, 26.2, 26.2, 26.2, 26.2, 26....
## $ land_area <dbl> 594.44, 594.44, 594.44, 594.44, 594.44, 594.4...
## $ hh_income <int> 53682, 53682, 53682, 53682, 53682, 53682, 536...
## $ su_gun4 <fct> [11,54], [11,54], [11,54], [11,54], [11,54], ...
## $ su_gun6 <fct> [10,12), [10,12), [10,12), [10,12), [10,12), ...
## $ fips <dbl> 1001, 1001, 1001, 1001, 1001, 1001, 1001, 100...
## $ votes_dem_2016 <int> 5908, 5908, 5908, 5908, 5908, 5908, 5908, 590...
## $ votes_gop_2016 <int> 18110, 18110, 18110, 18110, 18110, 18110, 181...
## $ total_votes_2016 <int> 24661, 24661, 24661, 24661, 24661, 24661, 246...
## $ per_dem_2016 <dbl> 0.2395685, 0.2395685, 0.2395685, 0.2395685, 0...
## $ per_gop_2016 <dbl> 0.7343579, 0.7343579, 0.7343579, 0.7343579, 0...
## $ diff_2016 <int> 12202, 12202, 12202, 12202, 12202, 12202, 122...
## $ per_dem_2012 <dbl> 0.2657577, 0.2657577, 0.2657577, 0.2657577, 0...
## $ per_gop_2012 <dbl> 0.7263374, 0.7263374, 0.7263374, 0.7263374, 0...
## $ diff_2012 <int> 11012, 11012, 11012, 11012, 11012, 11012, 110...
## $ winner <chr> "Trump", "Trump", "Trump", "Trump", "Trump", ...
## $ partywinner16 <chr> "Republican", "Republican", "Republican", "Re...
## $ winner12 <chr> "Romney", "Romney", "Romney", "Romney", "Romn...
## $ partywinner12 <chr> "Republican", "Republican", "Republican", "Re...
## $ flipped <chr> "No", "No", "No", "No", "No", "No", "No", "No...
Note that we have all of the records from county_map and each one of these records has been augmented by 31 variables from the county_data dataframe. Every point used to draw the map of a county carries all of the data for the entire county!
Let’s create the choropleth example from Healy.
p <- ggplot(data = county_full,
mapping = aes(x = long, y = lat,
fill = pop_dens,
group = group))
p1 <- p + geom_polygon(color = "gray90", size = 0.05) + coord_equal()
p2 <- p1 + scale_fill_brewer(palette="Blues",
labels = c("0-10", "10-50", "50-100", "100-500",
"500-1,000", "1,000-5,000", ">5,000"))
p2 + labs(fill = "Population per\nsquare mile") +
theme_map() +
guides(fill = guide_legend(nrow = 1)) +
theme(legend.position = "bottom")
Create WA_full and redo the map with it.
WA_full = filter(county_full,state == "WA")
glimpse(WA_full)
## Observations: 3,599
## Variables: 38
## $ long <dbl> -1372940, -1437163, -1458316, -1453530, -1438...
## $ lat <dbl> 350122.3, 365353.5, 370534.7, 389522.1, 38574...
## $ order <int> 176820, 176821, 176822, 176823, 176824, 17682...
## $ hole <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FAL...
## $ piece <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## $ group <fct> 0500000US53001.1, 0500000US53001.1, 0500000US...
## $ id <chr> "53001", "53001", "53001", "53001", "53001", ...
## $ name <chr> "Adams County", "Adams County", "Adams County...
## $ state <fct> WA, WA, WA, WA, WA, WA, WA, WA, WA, WA, WA, W...
## $ census_region <fct> West, West, West, West, West, West, West, Wes...
## $ pop_dens <fct> [ 0, 10), [ 0, 10), [ 0, 10), ...
## $ pop_dens4 <fct> [ 0, 17), [ 0, 17), [ 0, 17), [ 0, ...
## $ pop_dens6 <fct> [ 9, 25), [ 9, 25), [ 9, 25), [ 9, ...
## $ pct_black <fct> [ 0.0, 2.0), [ 0.0, 2.0), [ 0.0, 2.0), [ 0.0,...
## $ pop <int> 19179, 19179, 19179, 19179, 19179, 19179, 191...
## $ female <dbl> 49.2, 49.2, 49.2, 49.2, 49.2, 49.2, 49.2, 49....
## $ white <dbl> 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 91, 9...
## $ black <dbl> 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, 1.3, ...
## $ travel_time <dbl> 17.3, 17.3, 17.3, 17.3, 17.3, 17.3, 17.3, 17....
## $ land_area <dbl> 1924.98, 1924.98, 1924.98, 1924.98, 1924.98, ...
## $ hh_income <int> 43926, 43926, 43926, 43926, 43926, 43926, 439...
## $ su_gun4 <fct> [ 5, 8), [ 5, 8), [ 5, 8), [ 5, 8), [ 5, 8), ...
## $ su_gun6 <fct> [ 4, 7), [ 4, 7), [ 4, 7), [ 4, 7), [ 4, 7), ...
## $ fips <dbl> 53001, 53001, 53001, 53001, 53001, 53001, 530...
## $ votes_dem_2016 <int> 1118, 1118, 1118, 1118, 1118, 1118, 1118, 111...
## $ votes_gop_2016 <int> 2740, 2740, 2740, 2740, 2740, 2740, 2740, 274...
## $ total_votes_2016 <int> 4081, 4081, 4081, 4081, 4081, 4081, 4081, 408...
## $ per_dem_2016 <dbl> 0.2739525, 0.2739525, 0.2739525, 0.2739525, 0...
## $ per_gop_2016 <dbl> 0.6714041, 0.6714041, 0.6714041, 0.6714041, 0...
## $ diff_2016 <int> 1622, 1622, 1622, 1622, 1622, 1622, 1622, 162...
## $ per_dem_2012 <dbl> 0.3220735, 0.3220735, 0.3220735, 0.3220735, 0...
## $ per_gop_2012 <dbl> 0.6609305, 0.6609305, 0.6609305, 0.6609305, 0...
## $ diff_2012 <int> 1595, 1595, 1595, 1595, 1595, 1595, 1595, 159...
## $ winner <chr> "Trump", "Trump", "Trump", "Trump", "Trump", ...
## $ partywinner16 <chr> "Republican", "Republican", "Republican", "Re...
## $ winner12 <chr> "Romney", "Romney", "Romney", "Romney", "Romn...
## $ partywinner12 <chr> "Republican", "Republican", "Republican", "Re...
## $ flipped <chr> "No", "No", "No", "No", "No", "No", "No", "No...
WA_full = filter(county_full,state == "WA")
p <- ggplot(data = WA_full,
mapping = aes(x = long, y = lat,
fill = pop_dens,
group = group))
p1 <- p + geom_polygon(color = "gray90", size = 0.05) + coord_equal()
p2 <- p1 + scale_fill_brewer(palette="Blues",
labels = c("0-10", "10-50", "50-100", "100-500",
"500-1,000", "1,000-5,000", ">5,000"))
p2 + labs(fill = "Population per\nsquare mile") +
theme_map() +
guides(fill = guide_legend(nrow = 1)) +
theme(legend.position = "bottom")
Do a choropleth for some other variable and Washington’s counties. Do this for another state you’re familiar with.