Task

Make a choropleth map with leaflet for King County by zipcode for the year 2013 of the count of children age 19-35 months with incomplete recommended vaccinations.

Setup

Set Options

Set knitr options for rendering of this document.

suppressMessages(library(knitr))
opts_chunk$set(tidy=FALSE, cache=TRUE, echo=TRUE, message=FALSE)

Load Packages

Use pacman to load the required packages, installing as needed.

# Load packages, installing as needed
my_preferred_repo <- 'http://cran.r-project.org'
if (!require("pacman")) {install.packages("pacman", repos = my_preferred_repo)}
pacman::p_load(knitr, htmlTable, httr, dplyr, zipcode, rgdal, rgeos, leaflet, tigris)

Get Health Indicator Data

Get the health indicator data from King County.

# Get the King County Health Reform data
durl <- 'https://data.kingcounty.gov/api/views/ajpg-dges/rows.csv?accessType=DOWNLOAD'
dfile <- 'kchealth.csv'
res <- GET(durl, write_disk(dfile, overwrite=TRUE))
kchealth <- read.csv(dfile, stringsAsFactors = FALSE)

# Get the metadata to identify what the indicator codes are.
mdurl <- 'https://data.kingcounty.gov/api/views/8i4w-z3jh/rows.csv?accessType=DOWNLOAD'
mdfile <- 'kchealthmeta.csv'
res <- GET(mdurl, write_disk(mdfile, overwrite=TRUE))
kchealthmeta <- read.csv(mdfile, stringsAsFactors = FALSE)

Filter Data

Filter the metadata for the the child vaccination indicator.

# Filter the metadata for the indname "Child Vaccination"
kchealthmeta %>% filter(indname == 'Child vaccination') -> kchealthmeta.ind

Filter the data to include only the child vaccination indicator and the zip codes.

# Filter health data for child vaccination (%), indicator == 'childvac1935'
kchealth %>% 
    filter(indicator == 'childvac1935') %>% 
    select(zip=geo, childvac=estimate) -> kchealth.ind

Combine Zipcode Location Data

Combine health indicator data with additional location information based on the zip code.

data(zipcode)
zipcode %>% inner_join(kchealth.ind, 'zip') -> kchealth.ind

Summarize Data

View Metadata

Display the metadata for the dataset.

kable(kchealthmeta.ind[, 1:9])
indicator topic year yearagg yearstr datasrc datatype stattype geo
childvac1935 health 2013 2013 2013 iis vs percent zip
kable(kchealthmeta.ind[, 14])
Children age 19-35 months with incomplete recommended vaccinations (4:3:1:3:3:1:4 series)

Make a Summary Table

Take a quick look at the data with some summary statistics.

# Apply a list of functions to a vector
sum.f <- function(X, r.names) {
    FUN <- list(mean, sd, median, min, max)
    fn <- c('mean', 'sd', 'median', 'min', 'max')
    out <- sapply(FUN, function(func) func(X[complete.cases(X)]))
    out.df <- as.data.frame(t(out))
    rownames(out.df) <- r.names
    setNames(out.df, fn)
}

# Make a data frame of summary statistics for childvac
sum.df <- round(sum.f(kchealth.ind$childvac, 'childvac'), 2)
# Render the table in HTML
htmlTable(sum.df, cgroup = 'Statistic', n.cgroup = 5, 
          caption = "Incomplete Child Vaccinations in King County",
          css.cell = 'padding: 0px 10px 0px;')
Incomplete Child Vaccinations in King County
Statistic
mean sd median min max
childvac 39.37 7.91 38.65 23.2 76.5

Find Max Value

Find the maximum estimated incomplete vaccination percentage.

kchealth.ind %>% na.omit() %>% 
    summarize(max = max(childvac)) -> maxChildVacc
kchealth.ind %>% filter(childvac == maxChildVacc$max) -> kchealth.ind.max
kable(kchealth.ind.max)
zip city state latitude longitude childvac
98070 Vashon WA 47.4162 -122.4682 76.5

Make a Scatterplot

Plot the dataset and annotate the point with the maximum y value.

# Draw the plot
plot(kchealth.ind[, c('zip', 'childvac')], 
     xlab = "Zip Code", ylab = "Estimated Percent (%)", 
     main = "Incomplete Child Vaccinations in King County by Zip Code")

# Annotate the point with the maximum y value
text(kchealth.ind.max$zip, kchealth.ind.max$childvac-4, 
     paste(kchealth.ind.max$city, " (", kchealth.ind.max$zip,"): ", 
           kchealth.ind.max$childvac, '%', sep=''))
Scatter plot of Incomplete Child Vaccinations

Scatter plot of Incomplete Child Vaccinations

Get Spatial Data

Get the coordinates for the zip code regions from US Census (2013) data.

# Get the zipfile for the US census zipcodes map
url <- "http://www2.census.gov/geo/tiger/GENZ2013/cb_2013_us_zcta510_500k.zip"
destname <- "uszip.zip"
if (! file.exists(destname)) download.file(url, destname)

# Extract the shapefile
filename <- "cb_2013_us_zcta510_500k.shp"
downloaddir <- getwd()
if (! file.exists(filename)) unzip(destname, exdir=downloaddir, junkpaths=TRUE)
filename <- gsub(".shp", "", filename)

# Read in shapefile (NAD83 coordinate system)
dat <- readOGR(downloaddir, filename) 
## OGR data source with driver: ESRI Shapefile 
## Source: "/home/high/Documents/leaflet-choropleth", layer: "cb_2013_us_zcta510_500k"
## with 33144 features
## It has 5 fields

Prepare Spatial Data

Subset the data for the zip codes of interest and transform the data into the format needed to make the map.

# Subset for only those zipcodes in health reform dataset
dat.sub <- dat[dat$ZCTA5CE10 %in% unique(kchealth.ind$zip),]

# Transform to EPSG 4326 - WGS84 (required)
dat.trans <- spTransform(dat.sub, CRS("+init=epsg:4326"))

# Save the data slot
dat.data <- dat.trans@data[,c("GEOID10", "ZCTA5CE10")]

# Create a SpatialPolygonsDataFrame (needed to create geojson)
dat.spdf <- SpatialPolygonsDataFrame(dat.trans, data=dat.data)

Merge Health and Spatial Data

Merge the health data into the map data by zip code.

# Merge health indicator data with map data
dat.merged <- geo_join(dat.spdf, kchealth.ind, "ZCTA5CE10", "zip")

Make Choropleth Map

Use leaflet to make an interactive choropleth map with popups for data values.

# Define the popup messages
popup <- paste0("<b>", "City: ", "</b>", dat.merged$city, "<br>",
                "<b>", "Zip Code: ", "</b>", dat.merged$zip, "<br>",
                "<b>", "Incomplete Vaccinations (% of children): ", "</b>", 
                dat.merged$childvac)

# Define a color palette
pal <- colorNumeric(
    palette = "YlOrRd",
    domain = dat.merged$childvac
)

# Make the map
map <- leaflet() %>%
    addProviderTiles("CartoDB.Positron") %>%
    addPolygons(data = dat.merged, 
                fillColor = ~pal(childvac), 
                color = "#b2aeae",
                fillOpacity = 0.7, 
                weight = 1, 
                smoothFactor = 0.2,
                popup = popup) %>%
    addLegend(pal = pal, 
              values = dat.merged$childvac, 
              position = "bottomright", 
              title = "Incomplete Vaccinations",
              labFormat = labelFormat(suffix = "%")) %>%
    addPopups(kchealth.ind.max$longitude, kchealth.ind.max$latitude, 
              popup[dat.merged$zip == kchealth.ind.max$zip])

Display Map

The map is embedded in the body of the HTML document and is zoomable, clickable, etc. Clicking on a colored region (i.e., zip code area) will open a popup with the zip code and percentage estimate for that zip code.

# Display the map
map

Data Sources: Data.KingCounty.gov, Census.gov, R zipcode package

Source Code

License

The R Markdown source code is licensed under the CC0 1.0 Universal (CC0 1.0) Public Domain Dedication license.

References