library(tigris)
library(acs)
library(leaflet)
library(mapview)
library(ggplot2)
library(maptools)
library(stringr)
shapefile <- counties(state='MA', class='sp')
## Warning in proj4string(obj): CRS object has comment, which is lost in output
plot(shapefile)
head(shapefile)
## class : SpatialPolygonsDataFrame
## features : 6
## extent : -73.06851, -69.85886, 41.18705, 42.72161 (xmin, xmax, ymin, ymax)
## Warning in proj4string(x): CRS object has comment, which is lost in output
## crs : +proj=longlat +datum=NAD83 +no_defs
## variables : 17
## names : STATEFP, COUNTYFP, COUNTYNS, GEOID, NAME, NAMELSAD, LSAD, CLASSFP, MTFCC, CSAFP, CBSAFP, METDIVFP, FUNCSTAT, ALAND, AWATER, ...
## min values : 25, 001, 00606927, 25001, Barnstable, Barnstable County, 06, H1, G4020, 148, 12700, 14454, A, 1021019039, 1004283854, ...
## max values : 25, 027, 00606940, 25027, Worcester, Worcester County, 06, H4, G4020, 148, 49340, 14454, N, 3912554985, 666897066, ...
plot(shapefile)
pointLabel(coordinates(shapefile),labels=shapefile$COUNTYFP)
countylist <- c('17','21','25', '09')
shapefile <- tracts(state='MA', county=countylist, year=2018, class='sp')
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================ | 24%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 49%
|
|=================================== | 50%
|
|=================================== | 51%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|=================================================== | 74%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|=============================================================== | 91%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
## Warning in proj4string(obj): CRS object has comment, which is lost in output
plot(shapefile)
# Span
myspan <- 5
# End year
myendyear <- 2018
# Which table
mytable <- acs.lookup(endyear=2018, table.number="B08301")
## Warning in acs.lookup(endyear = 2018, table.number = "B08301"): acs.lookup for endyear>2015: using 2015 variable codes to access 2018 data.
## (See ?acs.lookup for details)
## Warning in acs.lookup(endyear = 2018, table.number = "B08301"): temporarily downloading and using archived XML variable lookup files;
## since this is *much* slower, recommend running
## acs.tables.install()
# Limit the geography
# Convert the list of counties that is in text form to numeric form
countylist2 <- as.numeric(countylist)
# Define the geography to limit the data import
mygeo <- geo.make(state=25, county=countylist2, tract="*")
** Choose the data columns of interest from the table **
Census tables usually have many “columns” of data. We can make a list of all the columns.
results(mytable)$variable.name
## [1] "Total:"
## [2] "Car, truck, or van:"
## [3] "Car, truck, or van: Drove alone"
## [4] "Car, truck, or van: Carpooled:"
## [5] "Car, truck, or van: Carpooled: In 2-person carpool"
## [6] "Car, truck, or van: Carpooled: In 3-person carpool"
## [7] "Car, truck, or van: Carpooled: In 4-person carpool"
## [8] "Car, truck, or van: Carpooled: In 5- or 6-person carpool"
## [9] "Car, truck, or van: Carpooled: In 7-or-more-person carpool"
## [10] "Public transportation (excluding taxicab):"
## [11] "Public transportation (excluding taxicab): Bus or trolley bus"
## [12] "Public transportation (excluding taxicab): Streetcar or trolley car (carro publico in Puerto Rico)"
## [13] "Public transportation (excluding taxicab): Subway or elevated"
## [14] "Public transportation (excluding taxicab): Railroad"
## [15] "Public transportation (excluding taxicab): Ferryboat"
## [16] "Taxicab"
## [17] "Motorcycle"
## [18] "Bicycle"
## [19] "Walked"
## [20] "Other means"
## [21] "Worked at home"
We want column #1 which gives the absolute total and also column #10 which gives the total commute by Public transportation. Later we will calculate the percent. We indicate that we want those two columns by putting them into the variable myvars.
myvars <- mytable[1]+mytable[18]
mydata <- acs.fetch(endyear=myendyear, span=myspan, geography=mygeo, variable=myvars)
mystate <- as.character(mydata@geography$state)
mycounty <- as.character(mydata@geography$county)
# add leading zeros to make the total number of characters 3
mycountypadded <- str_pad(mycounty, 3, pad="0")
mytract <- as.character(mydata@geography$tract)
acsgeoid <- paste0(mystate,mycountypadded,mytract)
# Put the dataset into a dataframe format
mydatadf <- data.frame(acsgeoid, mydata@estimate)
# Take a look at the first 6 rows of data
head(mydatadf)
## acsgeoid B08301_001
## Census Tract 3525, Middlesex County, Massachusetts 25017352500 2249
## Census Tract 3527, Middlesex County, Massachusetts 25017352700 1204
## Census Tract 3534, Middlesex County, Massachusetts 25017353400 2000
## Census Tract 3566.01, Middlesex County, Massachusetts 25017356601 2414
## Census Tract 3571, Middlesex County, Massachusetts 25017357100 2057
## Census Tract 3577, Middlesex County, Massachusetts 25017357700 1731
## B08301_018
## Census Tract 3525, Middlesex County, Massachusetts 248
## Census Tract 3527, Middlesex County, Massachusetts 89
## Census Tract 3534, Middlesex County, Massachusetts 218
## Census Tract 3566.01, Middlesex County, Massachusetts 24
## Census Tract 3571, Middlesex County, Massachusetts 32
## Census Tract 3577, Middlesex County, Massachusetts 53
# Rename column names
colnames(mydatadf)=c("GEOID", "total", "bicycle")
# Take a look at the first 6 rows of data
head(mydatadf)
## GEOID total bicycle
## Census Tract 3525, Middlesex County, Massachusetts 25017352500 2249 248
## Census Tract 3527, Middlesex County, Massachusetts 25017352700 1204 89
## Census Tract 3534, Middlesex County, Massachusetts 25017353400 2000 218
## Census Tract 3566.01, Middlesex County, Massachusetts 25017356601 2414 24
## Census Tract 3571, Middlesex County, Massachusetts 25017357100 2057 32
## Census Tract 3577, Middlesex County, Massachusetts 25017357700 1731 53
# Create new column and calculate the percent commute trips by transit
mydatadf$bicyclepercent <- mydatadf$bicycle/mydatadf$total*100
# Merge the ACS data with the shapefile
mydatamerged <- geo_join(shapefile, mydatadf, "GEOID", "GEOID")
# There is some weirdness with some tracts showing up as 100%, so eliminate those by subsetting the results to excluse them.
mydatamerged <- subset(mydatamerged, mydatamerged@data$bicyclepercent<100)
mypopup <- paste0("GEOID: ", mydatamerged$GEOID, "<br>", "Commute by Bicycle: ", round(mydatamerged$bicyclepercent,1), "%")
mypal <- colorNumeric(
palette = "YlGnBu",
domain = NULL
)
# Store values into variables
myLAT <- 42.366740
myLNG <- -71.084760
myZOOM <- 12
myTILES <- "CartoDB.Positron"
mymap <- leaflet() %>%
addProviderTiles(myTILES) %>%
setView(myLNG, myLAT, zoom = myZOOM) %>%
addPolygons(data = mydatamerged,
fillColor = ~mypal(bicyclepercent), # this makes the fill color on the spectrum of the palette
color = "#b2aeae", # this is the color of the outline of the shapes
fillOpacity = 0.7, # how see through the shapes are
weight = 1,
smoothFactor = 0.2,
popup = mypopup) %>%
addLegend(pal = mypal,
values = mydatamerged$bicyclepercent,
position = "bottomright",
title = "Commute by Bicycle",
labFormat = labelFormat(suffix = "%"),
bins=5)
mymap