Make a choropleth map with leaflet for King County by zipcode for the year 2013 of the count of children age 19-35 months with incomplete recommended vaccinations.
Set knitr options for rendering of this document.
suppressMessages(library(knitr))
opts_chunk$set(tidy=FALSE, cache=TRUE, echo=TRUE, message=FALSE)
Use pacman to load the required packages, installing as needed.
# Load packages, installing as needed
my_preferred_repo <- 'http://cran.r-project.org'
if (!require("pacman")) {install.packages("pacman", repos = my_preferred_repo)}
pacman::p_load(knitr, htmlTable, httr, dplyr, zipcode, rgdal, rgeos, leaflet, tigris)
Get the health indicator data from King County.
# Get the King County Health Reform data
durl <- 'https://data.kingcounty.gov/api/views/ajpg-dges/rows.csv?accessType=DOWNLOAD'
dfile <- 'kchealth.csv'
res <- GET(durl, write_disk(dfile, overwrite=TRUE))
kchealth <- read.csv(dfile, stringsAsFactors = FALSE)
# Get the metadata to identify what the indicator codes are.
mdurl <- 'https://data.kingcounty.gov/api/views/8i4w-z3jh/rows.csv?accessType=DOWNLOAD'
mdfile <- 'kchealthmeta.csv'
res <- GET(mdurl, write_disk(mdfile, overwrite=TRUE))
kchealthmeta <- read.csv(mdfile, stringsAsFactors = FALSE)
Filter the metadata for the the child vaccination indicator.
# Filter the metadata for the indname "Child Vaccination"
kchealthmeta %>% filter(indname == 'Child vaccination') -> kchealthmeta.ind
Filter the data to include only the child vaccination indicator and the zip codes.
# Filter health data for child vaccination (%), indicator == 'childvac1935'
kchealth %>%
filter(indicator == 'childvac1935') %>%
select(zip=geo, childvac=estimate) -> kchealth.ind
Combine health indicator data with additional location information based on the zip code.
data(zipcode)
zipcode %>% inner_join(kchealth.ind, 'zip') -> kchealth.ind
Display the metadata for the dataset.
kable(kchealthmeta.ind[, 1:9])
| indicator | topic | year | yearagg | yearstr | datasrc | datatype | stattype | geo |
|---|---|---|---|---|---|---|---|---|
| childvac1935 | health | 2013 | 2013 | 2013 | iis | vs | percent | zip |
kable(kchealthmeta.ind[, 14])
| Children age 19-35 months with incomplete recommended vaccinations (4:3:1:3:3:1:4 series) |
Take a quick look at the data with some summary statistics.
# Apply a list of functions to a vector
sum.f <- function(X, r.names) {
FUN <- list(mean, sd, median, min, max)
fn <- c('mean', 'sd', 'median', 'min', 'max')
out <- sapply(FUN, function(func) func(X[complete.cases(X)]))
out.df <- as.data.frame(t(out))
rownames(out.df) <- r.names
setNames(out.df, fn)
}
# Make a data frame of summary statistics for childvac
sum.df <- round(sum.f(kchealth.ind$childvac, 'childvac'), 2)
# Render the table in HTML
htmlTable(sum.df, cgroup = 'Statistic', n.cgroup = 5,
caption = "Incomplete Child Vaccinations in King County",
css.cell = 'padding: 0px 10px 0px;')
| Incomplete Child Vaccinations in King County | |||||
| Statistic | |||||
|---|---|---|---|---|---|
| mean | sd | median | min | max | |
| childvac | 39.37 | 7.91 | 38.65 | 23.2 | 76.5 |
Find the maximum estimated incomplete vaccination percentage.
kchealth.ind %>% na.omit() %>%
summarize(max = max(childvac)) -> maxChildVacc
kchealth.ind %>% filter(childvac == maxChildVacc$max) -> kchealth.ind.max
kable(kchealth.ind.max)
| zip | city | state | latitude | longitude | childvac |
|---|---|---|---|---|---|
| 98070 | Vashon | WA | 47.4162 | -122.4682 | 76.5 |
Plot the dataset and annotate the point with the maximum y value.
# Draw the plot
plot(kchealth.ind[, c('zip', 'childvac')],
xlab = "Zip Code", ylab = "Estimated Percent (%)",
main = "Incomplete Child Vaccinations in King County by Zip Code")
# Annotate the point with the maximum y value
text(kchealth.ind.max$zip, kchealth.ind.max$childvac-4,
paste(kchealth.ind.max$city, " (", kchealth.ind.max$zip,"): ",
kchealth.ind.max$childvac, '%', sep=''))
Scatter plot of Incomplete Child Vaccinations
Get the coordinates for the zip code regions from US Census (2013) data.
# Get the zipfile for the US census zipcodes map
url <- "http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_zcta510_500k.zip"
destname <- "uszip.zip"
if (! file.exists(destname)) download.file(url, destname)
# Extract the shapefile
filename <- "cb_2013_us_zcta510_500k.shp"
downloaddir <- getwd()
if (! file.exists(filename)) unzip(destname, exdir=downloaddir, junkpaths=TRUE)
filename <- gsub(".shp", "", filename)
# Read in shapefile (NAD83 coordinate system)
dat <- readOGR(downloaddir, filename)
## OGR data source with driver: ESRI Shapefile
## Source: "/home/high/Documents/leaflet-choropleth", layer: "cb_2013_us_zcta510_500k"
## with 33144 features
## It has 5 fields
Subset the data for the zip codes of interest and transform the data into the format needed to make the map.
# Subset for only those zipcodes in health reform dataset
dat.sub <- dat[dat$ZCTA5CE10 %in% unique(kchealth.ind$zip),]
# Transform to EPSG 4326 - WGS84 (required)
dat.trans <- spTransform(dat.sub, CRS("+init=epsg:4326"))
# Save the data slot
dat.data <- dat.trans@data[,c("GEOID10", "ZCTA5CE10")]
# Create a SpatialPolygonsDataFrame (needed to create geojson)
dat.spdf <- SpatialPolygonsDataFrame(dat.trans, data=dat.data)
Merge the health data into the map data by zip code.
# Merge health indicator data with map data
dat.merged <- geo_join(dat.spdf, kchealth.ind, "ZCTA5CE10", "zip")
Use leaflet to make an interactive choropleth map with popups for data values.
# Define the popup messages
popup <- paste0("<b>", "City: ", "</b>", dat.merged$city, "<br>",
"<b>", "Zip Code: ", "</b>", dat.merged$zip, "<br>",
"<b>", "Incomplete Vaccinations (% of children): ", "</b>",
dat.merged$childvac)
# Define a color palette
pal <- colorNumeric(
palette = "YlOrRd",
domain = dat.merged$childvac
)
# Make the map
map <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
addPolygons(data = dat.merged,
fillColor = ~pal(childvac),
color = "#b2aeae",
fillOpacity = 0.7,
weight = 1,
smoothFactor = 0.2,
popup = popup) %>%
addLegend(pal = pal,
values = dat.merged$childvac,
position = "bottomright",
title = "Incomplete Vaccinations",
labFormat = labelFormat(suffix = "%")) %>%
addPopups(kchealth.ind.max$longitude, kchealth.ind.max$latitude,
popup[dat.merged$zip == kchealth.ind.max$zip])
The map is embedded in the body of the HTML document and is zoomable, clickable, etc. Clicking on a colored region (i.e., zip code area) will open a popup with the zip code and percentage estimate for that zip code.
# Display the map
map
Data Sources: Data.KingCounty.gov, Census.gov, R zipcode package
The R Markdown source code is licensed under the CC0 1.0 Universal (CC0 1.0) Public Domain Dedication license.