library(tidyverse)
library(sf)
library(raster)
library(tmap)
library(tidycensus)
library(tigris)
library(lehdr)
library(httr)
library(jsonlite)
library(viridis)
library(ggpubr)
# insert your Google Places API key to your environment
census_api_key("YOUR API GOES HERE", install = TRUE, overwrite = TRUE)
readRenviron("~/.Renviron") # or, Restart R
# specify the folder to save tiger/line shapefiles
tigris_cache_dir(paste0(getwd(), "/tigris"))
readRenviron('~/.Renviron') # or, Restart R
Step 1. Import an official BeltLine shapefile as an sf object: line
getwd() # put the shapefile in your working directory
beltline <- st_read("Export_Output.shp")
class(beltline)
east_trail <- beltline %>%
filter(Status == "Completed", Name == "Eastside Trail")
Step 4. Read in the 2006-2010 & 2013-2017 ACS 5 year estimates at the tract level in Fulton County
tract_fulton <- tracts(state = "13", county = "121", cb = TRUE) %>% st_as_sf %>% dplyr::select(GEOID)
ses2010 <- get_acs(state = "13", county = "Fulton", geography = "tract", year = 2010,
variables = c("B01001_001", "B01001H_001",
"B15002_001", "B15002_015", "B15002_016", "B15002_017", "B15002_018",
"B15002_032", "B15002_033", "B15002_034", "B15002_035",
"B19013_001"), output = "wide", geometry = FALSE)
ses2010b <- ses2010 %>%
mutate(pct_nhw = B01001H_001E/B01001_001E*100,
pct_coll = (B15002_015E + B15002_016E + B15002_017E + B15002_018E +
B15002_032E + B15002_033E + B15002_034E + B15002_035E)/B15002_001E*100,
ln_medinc = log(B19013_001E)
) %>%
mutate(pct_nhw2010 = scale(pct_nhw)[, 1],
pct_coll2010 = scale(pct_coll)[, 1],
ln_medinc2010 = scale(ln_medinc)[, 1],
ses2010 = (pct_nhw2010 + pct_coll2010 + ln_medinc2010)/3) %>%
dplyr::select(GEOID, pct_nhw2010, pct_coll2010, ln_medinc2010, ses2010)
ses2017 <- get_acs(state = "13", county = "Fulton", geography = "tract", year = 2017,
variables = c("B01001_001", "B01001H_001",
"B15002_001", "B15002_015", "B15002_016", "B15002_017", "B15002_018",
"B15002_032", "B15002_033", "B15002_034", "B15002_035",
"B19013_001"), output = "wide", geometry = FALSE)
ses2017b <- ses2017 %>%
mutate(pct_nhw = B01001H_001E/B01001_001E*100,
pct_coll = (B15002_015E + B15002_016E + B15002_017E + B15002_018E +
B15002_032E + B15002_033E + B15002_034E + B15002_035E)/B15002_001E*100,
ln_medinc = log(B19013_001E)
) %>%
mutate(pct_nhw2017 = scale(pct_nhw)[, 1],
pct_coll2017 = scale(pct_coll)[, 1],
ln_medinc2017 = scale(ln_medinc)[, 1],
ses2017 = (pct_nhw2017 + pct_coll2017 + ln_medinc2017)/3) %>%
dplyr::select(GEOID, pct_nhw2017, pct_coll2017, ln_medinc2017, ses2017)
tract_ses <- tract_fulton %>%
left_join(ses2010b, by = "GEOID") %>%
left_join(ses2017b, by = "GEOID") %>%
dplyr::select(GEOID, ses2010, ses2017)
Step 7. Compare patterns of the SES index between 2006-2010 and 2013-2017
st_crs(east_trail)
st_crs(tract_ses)
tract_ses_P <- st_transform(tract_ses, 3857)
st_crs(tract_ses_P)
distances_tr <- st_distance(tract_ses_P, east_trail)
tract_ses_P$dist <- distances_tr[, 1]
ggplot(tract_ses_P) +
geom_point(aes(x = as.numeric(dist), y = ses2010), color = "grey") +
geom_smooth(aes(x = as.numeric(dist), y = ses2010), color = "grey", se = FALSE) +
geom_point(aes(x = as.numeric(dist), y = ses2017)) +
geom_smooth(aes(x = as.numeric(dist), y = ses2017), se = FALSE)
ggplot(tract_ses_P)+
geom_sf(aes(fill = as.numeric(dist)), colour = "white")
Step 1. Download tod_database_download.csv on https://toddata.cnt.org/downloads.php.
stations_df <- read_csv("tod_database_download.csv")
stations_sf <- st_as_sf(stations_df, coords = c("Longitude", "Latitude"), crs = 4326)
Step 7 Compute the same summary statistics for the entire country with the same data sets.
stations_sf <- stations_sf %>%
filter(Buffer == "Existing Transit") %>%
st_transform(2163) # projection that preserves areas for the contiguous US
bf_all <- st_buffer(stations_sf, dist = 800)
counties <- counties() %>% st_as_sf() %>% st_transform(2163) # all US counties - 3233
counties_tr <- counties[bf_all, ] # US counties overlapped by any transit station buffer - 191
counties_tr_df <- counties_tr %>%
mutate(FIPS = paste0(STATEFP, COUNTYFP)) %>%
arrange(FIPS) %>%
dplyr::select(FIPS)
# nrow(counties_tr_df) == n_distinct(counties_tr_df) # check any duplicate
counties_tr_list <- counties_tr_df$FIPS # create a vector
get_wrk_cnt <- function(x){
tryCatch(
{get_acs(state = substr(x, 1, 2), county = substr(x, 3, 5),
geography = "block group", year = 2017,
variables = c("B08301_001", "B08301_010", "B08301_018", "B08301_019"),
output = "wide", geometry = TRUE)
},
error = function(cond){
message("Here's the original error message:")
message(cond)
return(NA)
}
)
}
get_wrk_cnt only to those counties that overlap any buffer.# a <- Sys.time() # start time
output <- map(counties_tr_list, get_wrk_cnt)
# b <- Sys.time() # end time
# b - a # processing time: 7.708619 mins
## check the output & save for later use
map_lgl(output, function(x) class(x)[1] %in% c("sf", "data.frame"))
map_lgl(output, ~class(.)[1] %in% c("sf", "data.frame")) # this does the same job as the above line.
object.size(output) # about 120 mb
map_dbl(output, nrow) %>% sum() # total number of blkgroups
write_rds(output, "wrk_cnt_blkgrp2017.rds") # save it as an R data set
output <- read_rds("wrk_cnt_blkgrp2017.rds") # read in as an R data set
output is an sf object containing all block groups for each county.## combining lists of sf objects to single dataframe: https://github.com/r-spatial/sf/issues/798
## FYI, do.call https://www.rdocumentation.org/packages/base/versions/3.5.3/topics/do.call
# a <- Sys.time() # start time
output_bound <- do.call(what = sf:::rbind.sf, args = output) %>%
st_transform(crs = 2163)
# b <- Sys.time() # end time
# b - a # processing time: 37.85005 secs
object.size(output_bound) # about 120 mb, a little smaller than ouput
plot(st_geometry(output_bound)) # this takes time to process
write_rds(output_bound, "wrk_cnt_blkgrp2017_bound.rds") # save it as an R data set
output_bound <- read_rds("wrk_cnt_blkgrp2017_bound.rds") # read in as an R data set
blkgrp_all <- output_bound[bf_all, ]
blkgrp_all$area_whole <- blkgrp_all %>% st_area() %>% unclass()
seg_all <- st_intersection(bf_all, blkgrp_all)
seg_all$area_seg <- seg_all %>% st_area() %>% unclass()
seg_all$area_prop <- seg_all$area_seg / seg_all$area_whole
union_all <-
seg_all %>%
group_by(Station.Name) %>%
mutate(workers = B08301_001E * area_prop,
workers_tr = B08301_010E * area_prop,
workers_bk = B08301_018E * area_prop,
workers_wk = B08301_019E * area_prop
) %>%
summarize(n_seg = n(),
workers_sum = sum(workers),
workers_sum_tr = sum(workers_tr),
workers_sum_bk = sum(workers_bk),
workers_sum_wk = sum(workers_wk),
workers_pct_tr = workers_sum_tr/workers_sum*100,
workers_pct_active = (workers_sum_bk + workers_sum_bk)/workers_sum*100) %>%
arrange(desc(n_seg))
Step 1 Create xy points for API calls.
sf to use tidyverse verbsua00 <- urban_areas(cb = FALSE, year = 2017) # https://cran.r-project.org/web/packages/tigris/tigris.pdf
ua01 <- ua00 %>% st_as_sf()
boston_ua <- ua01 %>%
filter(grepl("Boston, MA", NAME10)) %>%
st_transform(crs = 2163) # projection that preserves areas for the contiguous US
sanf_ua <- ua01 %>%
filter(grepl("San Francisco", NAME10)) %>%
st_transform(crs = 2163) # projection that preserves areas for the contiguous US
800 metersboston_raster_template <-
raster(extent(boston_ua),
resolution = 800,
crs = st_crs(boston_ua)$proj4string) # http://strimas.com/spatial/hexagonal-grids/
boston_r <-
rasterize(boston_ua, boston_raster_template, field = 1)
sanf_raster_template <-
raster(extent(sanf_ua),
resolution = 800,
crs = st_crs(sanf_ua)$proj4string) # http://strimas.com/spatial/hexagonal-grids/
sanf_r <-
rasterize(sanf_ua, sanf_raster_template, field = 1)
boston_fishnet <- rasterToPolygons(boston_r) %>%
st_as_sf()
sanf_fishnet <- rasterToPolygons(sanf_r) %>%
st_as_sf()
boston_fishnet_cent <- st_centroid(boston_fishnet)
sanf_fishnet_cent <- st_centroid(sanf_fishnet)
# number of jobs for individual tracts in MA
ma_job <- grab_lodes(state = "MA", year = 2015, lodes_type = "wac",
job_type = "JT00", segment = "S000", agg_geo = "tract",
download_dir = file.path(getwd(), "lodes_raw")) # specify the folder to download
# tracts in Boston UA
ma_tr <- tracts(state = "MA", cb = TRUE)
ma_tr_sf <- ma_tr %>% st_as_sf() %>% st_transform(crs=2163)
boston_tr_sf <- ma_tr_sf[boston_ua, ]
# calculate job density for individual tracts
# extract the xy coordinates of the tract with the hightest density
boston_tr_sf %>%
left_join(ma_job, by = c("GEOID" = "w_tract")) %>%
mutate(ALAND2 = as.numeric(ALAND),
jobden = C000/ALAND2, 0) %>%
arrange(desc(jobden)) %>%
top_n(jobden, n=1) %>%
dplyr::select(GEOID) %>%
st_centroid()
# number of jobs for individual tracts in CA
ca_job <- grab_lodes(state = "CA", year = 2015, lodes_type = "wac",
job_type = "JT00", segment = "S000", agg_geo = "tract",
download_dir = file.path(getwd(), "lodes_raw")) # specify the folder to download
# tracts in San Francisco UA
ca_tr <- tracts(state = "CA", cb = TRUE)
ca_tr_sf <- ca_tr %>% st_as_sf() %>% st_transform(crs=2163)
sanf_tr_sf <- ca_tr_sf[sanf_ua, ]
# calculate job density for individual tracts
# extract the xy coordinates of the tract with the hightest density
sanf_tr_sf %>%
left_join(ca_job, by = c("GEOID" = "w_tract")) %>%
mutate(ALAND2 = as.numeric(ALAND),
jobden = C000/ALAND2, 0) %>%
arrange(desc(jobden)) %>%
top_n(jobden, n=1) %>%
dplyr::select(GEOID) %>%
st_centroid()
# For Boston
boston_cbd_point <- c(2317225, 124512.1) %>% st_point()
boston_cbd <- st_sfc(boston_cbd_point, crs = 2163)
boston_cbd_bf <- boston_cbd %>% st_buffer(dist = 8046.72) # create a 5-mile buffer
boston_pts <- boston_fishnet_cent[boston_cbd_bf, ]
tmap_mode("view")
tm_basemap("OpenStreetMap.Mapnik") +
tm_shape(boston_pts) +
tm_dots()
# For San Francisco
sanf_cbd_point <- c(-1943352, -537613.2) %>% st_point()
sanf_cbd <- st_sfc(sanf_cbd_point, crs = 2163)
sanf_cbd_bf <- sanf_cbd %>% st_buffer(dist = 8046.72) # create a 5-mile buffer
sanf_pts <- sanf_fishnet_cent[sanf_cbd_bf, ]
tmap_mode("view")
tm_basemap("OpenStreetMap.Mapnik") +
tm_shape(sanf_pts) +
tm_dots()
crs=4326 and extract xy coordinatesboston_pts_wgs84 <-
boston_pts %>%
st_transform(crs = 4326) %>%
st_coordinates() %>%
as.data.frame()
sanf_pts_wgs84 <-
sanf_pts %>%
st_transform(crs = 4326) %>%
st_coordinates() %>%
as.data.frame()
Step 2 Send Google Places API calls.
boston_lat_lng <- paste(boston_pts_wgs84$Y, boston_pts_wgs84$X, sep=",")
sanf_lat_lng <- paste(sanf_pts_wgs84$Y, sanf_pts_wgs84$X, sep=",")
url_1 <- "https://maps.googleapis.com/maps/api/place/nearbysearch/json?location="
url_2 <- "&rankby=distance&type="
business <- "cafe"
my_api <- "YOUR API KEY GOES HERE"
token <- "&pagetoken="
google_places <- function(lat_lng){
# read in output from Google
result1_raw <<-
paste0(url_1, lat_lng, url_2, business, "&key=", my_api) %>%
GET() %>%
content(as = "text") %>%
fromJSON()
# transform to a data frame
if (result1_raw$status == "OK"){
result1_df <<-
result1_raw$results %>%
dplyr::select(name, place_id, types, rating, user_ratings_total)
result1_df$x <<- result1_raw$results$geometry$location$lng
result1_df$y <<- result1_raw$results$geometry$location$lat
# result1_df$geometry <- NULL
} else {
result1_df <<- NA
}
print("Result #1 is just finished.")
Sys.sleep(2)
# run if results have more than 20 businesses
if (is.null(result1_raw$next_page_token) == FALSE){
# read in output from Google
result2_raw <<-
paste0(url_1, lat_lng, url_2, business, "&key=", my_api, token, result1_raw$next_page_token) %>%
GET() %>%
content(as = "text") %>%
fromJSON()
# transform to a data frame
if (result2_raw$status == "OK"){
result2_df <<-
result2_raw$results %>%
dplyr::select(name, place_id, types, rating, user_ratings_total)
result2_df$x <<- result2_raw$results$geometry$location$lng
result2_df$y <<- result2_raw$results$geometry$location$lat
# result2_df$geometry <- NULL
} else {
result2_df <<- NA
}
} else {
result2_df <<- NA
}
print("Result #2 is just finished.")
Sys.sleep(2)
# run if results have more than 40 businesses
if (is.null(result2_raw$next_page_token) == FALSE){
# read in output from Google
result3_raw <<-
paste0(url_1, lat_lng, url_2, business, "&key=", my_api, token, result2_raw$next_page_token) %>%
GET() %>%
content(as = "text") %>%
fromJSON()
# transform to a data frame
if (result3_raw$status == "OK"){
result3_df <<-
result3_raw$results %>%
dplyr::select(name, place_id, types, rating, user_ratings_total)
result3_df$x <<- result3_raw$results$geometry$location$lng
result3_df$y <<- result3_raw$results$geometry$location$lat
# result3_df$geometry <- NULL
} else {
result3_df <<- NA
}
} else {
result3_df <<- NA
}
print("Result #3 is just finished.")
result_invalid <<- list(result1_raw$status, result2_raw$status, result3_raw$status) %>%
map_lgl(~.=="INVALID_REQUEST")
if (any(result_invalid)){
print("INVALID_REQUEST")
return("INVALID_REQUEST")
} else {
result_list <<- list(result1_df, result2_df, result3_df)
result_final <<- result_list[!is.na(result_list)] %>%
dplyr::bind_rows()
print(paste0("Succesful collection for ", as.character(i), "th point"))
return(result_final)
}
Sys.sleep(2)
}
df object to an sf objectboston_output <- vector("list", length(boston_lat_lng))
for (i in seq_along(boston_lat_lng)) {
boston_output[[i]] <- google_places(boston_lat_lng[[i]])
}
map(boston_output, ~class(.)!="data.frame") %>% unlist() %>% sum()
map_dbl(boston_output, nrow) %>% unlist() %>% sum()/60
boston_output_df <- dplyr::bind_rows(boston_output) %>% as_tibble()
boston_output_df2 <-
boston_output_df %>%
group_by(place_id) %>%
summarize(rating = mean(rating, na.rm = TRUE),
rating_n = mean(user_ratings_total, na.rm = TRUE),
x = mean(x),
y = mean(y))
boston_output_sf <- st_as_sf(boston_output_df2, coords = c("x", "y"), crs = 4326)
boston_output_sf$ln_rating_n <- log(boston_output_sf$rating_n +1)
tmap_mode("view")
tm_basemap("Stamen.TonerLite", alpha = 0.25) +
tm_shape(boston_output_sf) +
tm_dots(col = "ln_rating_n", size = 0.05, palette = "-viridis") +
tm_shape(boston_pts) +
tm_dots(col = "red", size = 0.01, alpha = 0.25) +
tm_shape(boston_cbd_bf) +
tm_borders(col = "red") +
tm_tiles("Stamen.TonerLabels")
sanf_output <- vector("list", length(sanf_lat_lng))
for (i in seq_along(sanf_lat_lng)) {
sanf_output[[i]] <- google_places(sanf_lat_lng[[i]])
}
map(sanf_output, ~class(.)!="data.frame") %>% unlist() %>% sum()
map_dbl(sanf_output, nrow) %>% unlist() %>% sum()/60
sanf_output_df <- dplyr::bind_rows(sanf_output) %>% as_tibble()
sanf_output_df2 <-
sanf_output_df %>%
group_by(place_id) %>%
summarize(rating = mean(rating, na.rm = TRUE),
rating_n = mean(user_ratings_total, na.rm = TRUE),
x = mean(x),
y = mean(y))
sanf_output_sf <- st_as_sf(sanf_output_df2, coords = c("x", "y"), crs = 4326)
sanf_output_sf$ln_rating_n <- log(sanf_output_sf$rating_n +1)
tmap_mode("view")
tm_basemap("Stamen.TonerLite", alpha = 0.25) +
tm_shape(sanf_output_sf) +
tm_dots(col = "ln_rating_n", size = 0.05, palette = "-viridis") +
tm_shape(sanf_pts) +
tm_dots(col = "red", size = 0.01, alpha = 0.25) +
tm_shape(sanf_cbd_bf) +
tm_borders(col = "red") +
tm_tiles("Stamen.TonerLabels")
Step 3 Compute cafe density for individual tracts
boston_den_cafe <- boston_output_sf %>%
st_transform(crs = 2163) %>%
st_join(boston_tr_sf[boston_cbd_bf, ]) %>% # Boston tracts within five miles from the CBD
filter(is.na(GEOID) == FALSE) %>% # drop census tracts with no cafe
mutate(
sqmt = as.numeric(ALAND), # ALAND is characer
sqmi = sqmt/1000000*0.386102159 # convert from sq meter to sq mile
) %>%
group_by(GEOID) %>%
summarize(
n = n(),
sqmi_m = mean(sqmi),
den_cafe = n/sqmi_m
) %>%
dplyr::select(GEOID, den_cafe)
sanf_den_cafe <- sanf_output_sf %>%
st_transform(crs = 2163) %>%
st_join(sanf_tr_sf[sanf_cbd_bf, ]) %>% # Boston tracts within five miles from the CBD
filter(is.na(GEOID) == FALSE) %>% # drop census tracts with no cafe
mutate(
sqmt = as.numeric(ALAND), # ALAND is characer
sqmi = sqmt/1000000*0.386102159 # convert from sq meter to sq mile
) %>%
group_by(GEOID) %>%
summarize(
n = n(),
sqmi_m = mean(sqmi),
den_cafe = n/sqmi_m
) %>%
dplyr::select(GEOID, den_cafe)
Step 4 Collect an alternative measure
# select only retail jobs
ma_retail <- ma_job %>%
dplyr::select(GEOID = w_tract, retail = CNS07)
# compute retail density for individual tracts
boston_den_retail_sf <-
boston_tr_sf[boston_cbd_bf, ] %>%
left_join(ma_retail) %>%
mutate(
sqmt = as.numeric(ALAND),
sqmi = sqmt/1000000*0.386102159, # convert from sq meter to sq mile
den_retail = retail/sqmi
) %>%
dplyr::select(GEOID, den_retail)
tmap_mode("view")
tm_shape(boston_cbd_bf) + # the first two lines do not add any line, but set the geographic extend
tm_borders(lwd = 0) +
tm_shape(boston_den_retail_sf) +
tm_polygons(border.col = "white", lwd = 1,
col = "den_retail", style = "quantile", n = 10, alpha = 0.5) +
tm_shape(boston_cbd_bf) +
tm_borders(col = "black", alpha = 0.5)
# select only retail jobs
ca_retail <- ca_job %>%
dplyr::select(GEOID = w_tract, retail = CNS07)
# compute retail density for individual tracts
sanf_den_retail_sf <-
sanf_tr_sf[sanf_cbd_bf, ] %>%
left_join(ca_retail) %>%
mutate(
sqmt = as.numeric(ALAND),
sqmi = sqmt/1000000*0.386102159, # convert from sq meter to sq mile
den_retail = retail/sqmi
) %>%
dplyr::select(GEOID, den_retail)
tmap_mode("view")
tm_shape(sanf_cbd_bf) + # the first two lines do not add any line, but set the geographic extend
tm_borders(lwd = 0) +
tm_shape(sanf_den_retail_sf) +
tm_polygons(border.col = "white", lwd = 1,
col = "den_retail", style = "quantile", n = 10, alpha = 0.5) +
tm_shape(sanf_cbd_bf) +
tm_borders(col = "black", alpha = 0.5)
Step 5 Compute college graduates 25-34 as of 2017
# var_acs17 <- load_variables(2017, "acs5", cache = TRUE)
# View(var_acs17)
# B15001_017: Male!!25 to 34 years!!Bachelor's degree
# B15001_018: Male!!25 to 34 years!!Graduate or professional degree
# B15001_058: Female!!25 to 34 years!!Bachelor's degree
# B15001_059: Female!!25 to 34 years!!Graduate or professional degree
ma_young <- get_acs(state = "MA", geography = "tract", year = 2017,
variables = c("B15001_017", "B15001_018", "B15001_058", "B15001_059"),
output = "wide")
ma_young2 <- ma_young %>%
mutate(young = B15001_017E + B15001_018E + B15001_058E + B15001_059E) %>%
dplyr::select(GEOID, young)
boston_cbd_young <- boston_tr_sf[boston_cbd_bf, ] %>%
left_join(ma_young2) %>%
mutate(
sum_young = sum(young),
pct_young = young/sum_young*100
) %>%
dplyr::select(GEOID, pct_young)
tmap_mode("view")
tm_basemap("Stamen.TonerLite", alpha = 0.25) +
tm_shape(boston_cbd_bf) + # the first two lines do not add any line, but set the geographic extend
tm_borders(lwd = 0) +
tm_shape(boston_cbd_young) +
tm_polygons(border.col = "white", lwd = 1,
col = "pct_young", style = "quantile", n = 5, alpha = 0.5) +
tm_shape(boston_output_sf) +
tm_dots(col = "blue", size = 0.03, alpha = 0.25) +
tm_shape(boston_cbd_bf) +
tm_borders(col = "black", alpha = 0.25)
ca_young <- get_acs(state = "CA", geography = "tract", year = 2017,
variables = c("B15001_017", "B15001_018", "B15001_058", "B15001_059"),
output = "wide")
ca_young2 <- ca_young %>%
mutate(young = B15001_017E + B15001_018E + B15001_058E + B15001_059E) %>%
dplyr::select(GEOID, young)
sanf_cbd_young <- sanf_tr_sf[sanf_cbd_bf, ] %>%
left_join(ca_young2) %>%
mutate(
sum_young = sum(young),
pct_young = young/sum_young*100
) %>%
dplyr::select(GEOID, pct_young)
tmap_mode("view")
tm_basemap("Stamen.TonerLite", alpha = 0.25) +
tm_shape(sanf_cbd_bf) + # the first two lines do not add any line, but set the geographic extend
tm_borders(lwd = 0) +
tm_shape(sanf_cbd_young) +
tm_polygons(border.col = "white", lwd = 1,
col = "pct_young", style = "quantile", n = 10, alpha = 0.5) +
tm_shape(sanf_output_sf) +
tm_dots(col = "blue", size = 0.03, alpha = 0.25) +
tm_shape(sanf_cbd_bf) +
tm_borders(col = "black", alpha = 0.25)
Step 6 Create plots to compare the performance of two measures
sf objects to df objects# Cafe density from Google Places API: the outcome from Step 3
boston_den_cafe_df <- boston_den_cafe
st_geometry(boston_den_cafe_df) <- NULL
sanf_den_cafe_df <- sanf_den_cafe
st_geometry(sanf_den_cafe_df) <- NULL
# Retail job density from LEHD: the outcome from Step 4
boston_den_retail_df <- boston_den_retail_sf
st_geometry(boston_den_retail_df) <- NULL
sanf_den_retail_df <- sanf_den_retail_sf
st_geometry(sanf_den_retail_df) <- NULL
# College graduate 25-34 from ACS 2017: the outcome from Step 5
boston_cbd_young_df <- boston_cbd_young
st_geometry(boston_cbd_young_df) <- NULL
sanf_cbd_young_df <- sanf_cbd_young
st_geometry(sanf_cbd_young_df) <- NULL
data_boston <- left_join(boston_den_retail_df, boston_den_cafe_df) %>%
mutate(
den_retail = ifelse(is.na(den_retail)== TRUE, 0, den_retail),
ln_retail = log(den_retail+1),
ln_retail = scale(ln_retail),
den_cafe = ifelse(is.na(den_cafe)== TRUE, 0, den_cafe),
ln_cafe = log(den_cafe+1),
ln_cafe = scale(ln_cafe)
) %>%
left_join(boston_cbd_young_df)
# https://stackoverflow.com/questions/7549694/adding-regression-line-equation-and-r2-on-graph
lm_eqn <- function(df, y, x){
m <- lm(y ~ x, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 3),
b = format(coef(m)[2], digits = 3),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
a <- lm_eqn(data_boston, data_boston$pct_young, data_boston$ln_retail)
b <- lm_eqn(data_boston, data_boston$pct_young, data_boston$ln_cafe)
lm_retail_boston <- ggplot(data_boston) +
# stat_smooth_func(aes(ln_retail, pct_young), geom="text",method="lm",hjust=0,parse=TRUE) +
geom_smooth(aes(ln_retail, pct_young), method="lm",se=FALSE, color = "coral1", size = 0.1) +
geom_point(aes(ln_retail, pct_young), color = "coral4") +
coord_cartesian(xlim = c(-3, 3), ylim = c(0, 1.25)) +
geom_text(x = -1, y = 1.2, label = a, parse = TRUE)
lm_cafe_boston <- ggplot(data_boston) +
# stat_smooth_func(aes(ln_cafe, pct_young), geom="text",method="lm",hjust=0,parse=TRUE) +
geom_smooth(aes(ln_cafe, pct_young), method="lm",se=FALSE, color = "deepskyblue1", size = 0.1) +
geom_point(aes(ln_cafe, pct_young), color = "deepskyblue4") +
coord_cartesian(xlim = c(-3, 3), ylim = c(0, 1.25)) +
geom_text(x = -1, y = 1.2, label = b, parse = TRUE)
ggarrange(lm_retail_boston, lm_cafe_boston,
ncol = 2, nrow = 1)
data_sanf <- left_join(sanf_den_retail_df, sanf_den_cafe_df) %>%
mutate(
den_retail = ifelse(is.na(den_retail)== TRUE, 0, den_retail),
ln_retail = log(den_retail+1),
ln_retail = scale(ln_retail),
den_cafe = ifelse(is.na(den_cafe)== TRUE, 0, den_cafe),
ln_cafe = log(den_cafe+1),
ln_cafe = scale(ln_cafe)
) %>%
left_join(sanf_cbd_young_df)
# https://stackoverflow.com/questions/7549694/adding-regression-line-equation-and-r2-on-graph
lm_eqn <- function(df, y, x){
m <- lm(y ~ x, df);
eq <- substitute(italic(y) == a + b %.% italic(x)*","~~italic(r)^2~"="~r2,
list(a = format(coef(m)[1], digits = 3),
b = format(coef(m)[2], digits = 3),
r2 = format(summary(m)$r.squared, digits = 3)))
as.character(as.expression(eq));
}
a <- lm_eqn(data_sanf, data_sanf$pct_young, data_sanf$ln_retail)
b <- lm_eqn(data_sanf, data_sanf$pct_young, data_sanf$ln_cafe)
lm_retail_sanf <- ggplot(data_sanf) +
# stat_smooth_func(aes(ln_retail, pct_young), geom="text",method="lm",hjust=0,parse=TRUE) +
geom_smooth(aes(ln_retail, pct_young), method="lm",se=FALSE, color = "coral1", size = 0.1) +
geom_point(aes(ln_retail, pct_young), color = "coral4") +
coord_cartesian(xlim = c(-3, 3), ylim = c(0, 1.25)) +
geom_text(x = -1, y = 1.2, label = a, parse = TRUE)
lm_cafe_sanf <- ggplot(data_sanf) +
# stat_smooth_func(aes(ln_cafe, pct_young), geom="text",method="lm",hjust=0,parse=TRUE) +
geom_smooth(aes(ln_cafe, pct_young), method="lm",se=FALSE, color = "deepskyblue1", size = 0.1) +
geom_point(aes(ln_cafe, pct_young), color = "deepskyblue4") +
coord_cartesian(xlim = c(-3, 3), ylim = c(0, 1.25)) +
geom_text(x = -1, y = 1.2, label = b, parse = TRUE)
ggarrange(lm_retail_sanf, lm_cafe_sanf,
ncol = 2, nrow = 1)