library(tidycensus)
library(sf)
library(tmap)
library(jsonlite)
library(tidyverse)
library(httr)
library(jsonlite)
library(reshape2)
library(here)
library(knitr)
library(tigris)
library(units)
Choose two place types for the includedTypes
parameter in the Google Places Nearby Search API. Refer to the API
documentation for options.
Response: I chose
asian_grocery_store
and hiking_area
.
tigris
package to obtain an
sf
object of the city boundary.# Get all places in Massachusetts
ma_places <- places(state = "MA", year = 2021) # returns sf object
# Filter for Boston
boston_boundary <- ma_places %>%
filter(NAME == "Boston")
# Map
plot(st_geometry(boston_boundary))
longitude
, latitude
, and radius
)
for each area:
tidycensus::census_api_key(Sys.getenv("CENSUS_API_KEY"), overwrite=FALSE)
## To install your API key for use in future sessions, run this function with `install = TRUE`.
# Get block groups in Suffolk County, MA
bg <- suppressMessages(
tidycensus::get_acs(
geography = "block group",
state = "MA",
county = "Suffolk",
variables = c(hhincome = "B19013_001"),
year = 2021,
survey = "acs5",
geometry = TRUE,
output = "wide"
)
)
# Get Boston city boundary
boston <- tigris::places("MA") %>% filter(NAME == "Boston")
## Retrieving data for the year 2024
# Get block groups intersecting with Boston
bg_boston <- bg[boston, ]
In this step, I found that Boston has too many block groups, which makes the analysis heavy and increases API calls. To simplify, I decided to reduce the coverage area. Since my travel plan is centered around Logan Airport (and downtown area), I used the block group containing Logan Airport as the center and created a 1-mile circle to select intersecting block groups.
# Define EPSG codes for GCS (WGS 84) and PCS (WGS 84 / UTM zone 19N)
gcs_id <- 4326
pcs_id <- 32619 # for Boston
# radius in meters (1 miles)
radius_m <- 1609
logan_id <- "250259813001" # the logan airport bg ID
# BG
logan <- bg_boston %>% filter(GEOID == logan_id)
# buffer 1 miles around the logan BG
logan_m <- st_transform(logan, pcs_id)
buffer_m <- st_buffer(logan_m, dist = radius_m)
# intersect the whole BG layer with the buffer
bg_m <- st_transform(bg_boston, pcs_id)
sel <- st_intersects(bg_m, buffer_m, sparse = FALSE)
bg_logan_1mi <- bg_m[sel, ]
nrow(bg_logan_1mi)
## [1] 61
# Map
tmap_mode("view")
## ℹ tmap mode set to "view".
tm_shape(bg_logan_1mi) + tm_borders(lwd = 2) +
tm_shape(boston) + tm_polygons(col = 'blue', fill_alpha = 0.4)
I believe the coverage area is now relatively small while still encompassing the locations (e.g., Boston Harbor) that interest me.
# Function: Get XY coordinates and radius
getXYRadius <- function(polygon, gcs_id, pcs_id){
if (st_crs(polygon) != st_crs(pcs_id)){
polygon <- polygon %>% st_transform(pcs_id)
}
bb <- st_bbox(polygon)
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = pcs_id)
bb_center <- bb %>% st_as_sfc() %>% st_centroid()
r <- st_distance(bb_corner, bb_center)
bb_center <- bb_center %>% st_transform(gcs_id)
xy <- bb_center %>% st_coordinates() %>% as.vector()
lon_lat_radius <- data.frame(x = xy[1],
y = xy[2],
r = r)
return(lon_lat_radius)
}
# Pre-allocate a data frame. Results will fill this data frame
bg_logan_1mi_xyr <- data.frame(x = numeric(nrow(bg_logan_1mi)),
y = NA,
r = NA)
# Do a for-loop
for (i in 1:nrow(bg_logan_1mi)){
bg_logan_1mi_xyr[i,] <- bg_logan_1mi[i, ] %>%
getXYRadius(gcs_id = gcs_id,
pcs_id = pcs_id)
}
locationRestriction
parameters on a map:
sf
object using longitude
,
latitude
, and sf::st_as_sf()
.radius
and
sf::st_buffer()
.tmap
package.tmap_mode('view')
## ℹ tmap mode set to "view".
points_ll <- st_as_sf(bg_logan_1mi_xyr, coords = c("x", "y"), crs=4326)
points_m <- st_transform(points_ll, pcs_id)
points_m <- st_transform(points_ll, pcs_id)
buf_m <- st_buffer(points_m, dist = bg_logan_1mi_xyr$r)
buf_ll <- st_transform(buf_m, gcs_id)
# Map
tm_shape(buf_ll) + tm_polygons(fill = "red", fill_alpha = 0.3) +
tm_shape(bg_logan_1mi) + tm_borders(col = "blue") +
tm_shape(boston_boundary) + tm_borders(lwd = 2)
includedTypes
parameter.FieldMask
, include these 8 fields:
places.id
; places.displayName
;
places.formattedAddress
; places.location
;
places.types
; places.priceLevel
;
places.rating
; places.userRatingCount
nearbySearch <- function(lat, lon, radius, types_vec, fieldmask_vec, google_api_key){
endpoint <- "https://places.googleapis.com/v1/places:searchNearby"
body <- list(
includedTypes = as.list(types_vec),
locationRestriction = list(
circle = list(
center = list(latitude = lat, longitude = lon),
radius = radius
)
),
rankPreference = "DISTANCE"
)
resp <- POST(
endpoint,
add_headers(
"Content-Type" = "application/json",
"X-Goog-Api-Key" = google_api_key,
"X-Goog-FieldMask" = paste(fieldmask_vec, collapse = ",")),
body = body,
encode = "json"
)
data <- content(resp, as="text") %>%
jsonlite::fromJSON(flatten = T) %>%
as.data.frame()
if (nrow(data) == 20){
print("WARNING: The response has 20 rows! Consider using a smaller spatial unit.")
}
return(data)
}
# pre-allocate list
data_list <- vector("list", nrow(bg_logan_1mi_xyr))
for (i in seq_len(nrow(bg_logan_1mi_xyr))) {
data_list[[i]] <- nearbySearch(
lat = bg_logan_1mi_xyr$y[i],
lon = bg_logan_1mi_xyr$x[i],
radius = bg_logan_1mi_xyr$r[i],
types_vec = c("asian_grocery_store"),
fieldmask_vec = c("places.id",
"places.displayName",
"places.formattedAddress",
"places.location",
"places.types",
"places.primaryType",
"places.businessStatus",
"places.priceLevel",
"places.priceRange",
"places.rating",
"places.userRatingCount",
"places.reviews",
"places.reviewSummary"),
google_api_key = Sys.getenv("GOOGLE_API")
)
Sys.sleep(1)
}
# Combine all data frames
asian_grocery_store <- dplyr::bind_rows(data_list)
saveRDS(asian_grocery_store, here('asian_grocery_store.rds'))
# pre-allocate list
data_list <- vector("list", nrow(bg_logan_1mi_xyr))
for (i in seq_len(nrow(bg_logan_1mi_xyr))) {
data_list[[i]] <- nearbySearch(
lat = bg_logan_1mi_xyr$y[i],
lon = bg_logan_1mi_xyr$x[i],
radius = bg_logan_1mi_xyr$r[i],
types_vec = c("hiking_area"),
fieldmask_vec = c("places.id",
"places.displayName",
"places.formattedAddress",
"places.location",
"places.types",
"places.primaryType",
"places.businessStatus",
"places.priceLevel",
"places.priceRange",
"places.rating",
"places.userRatingCount",
"places.reviews",
"places.reviewSummary"),
google_api_key = Sys.getenv("GOOGLE_API")
)
Sys.sleep(1)
}
# Combine all data frames
hiking_area <- dplyr::bind_rows(data_list)
saveRDS(hiking_area, here('hiking_area.rds'))
sf
object using
sf::st_as_sf()
.# asian_grocery_store <- readRDS('asian_grocery_store.rds')
# hiking_area <- readRDS('hiking_area.rds')
# Asian grocery stores
asian_sf <- asian_grocery_store %>%
rename(x = places.location.longitude, y = places.location.latitude) %>%
filter(!is.na(x) & !is.na(y)) %>%
mutate(type = "Asian Grocery Store") %>%
st_as_sf(coords = c("x", "y"), crs = 4326)
# Hiking areas
hiking_sf <- hiking_area %>%
rename(x = places.location.longitude, y = places.location.latitude) %>%
filter(!is.na(x) & !is.na(y)) %>%
mutate(type = "Hiking Area") %>%
st_as_sf(coords = c("x", "y"), crs = 4326)
# Combine
pois_sf <- bind_rows(asian_sf, hiking_sf)
# Map
tmap_mode("view")
tm_shape(boston_boundary) + tm_borders(lwd = 2) +
tm_shape(pois_sf) +
tm_symbols(
shape = "type",
col = "places.rating",
size = "places.userRatingCount",
fill.scale = tm_scale_continuous(values = "viridis", name="Rating"),
palette = "viridis",
fill_alpha = 0.9,
popup.vars = c(
"Name" = "places.displayName.text",
"Rating" = "places.rating",
"Count" = "places.userRatingCount",
"Type" = "type"
)) +
tm_shape(bg_logan_1mi) +
tm_borders() +
tm_layout(legend.outside = TRUE)
Answer the following questions at the end of your script:
What city did you choose?
Response: I selected Boston, but constrained the analysis to block groups within one mile of the block group containing Logan Airport in order to reduce API calls.
Which two place types did you select?
Response: Asian grocery stores and hiking areas.
How many rows does your dataset contain?
Response: The dataset contains 9 rows for Asian grocery stores and 31 rows for hiking areas.
Upon visual inspection, do you notice any spatial patterns in how POIs are distributed across the city?
Response: Several Asian grocery stores are concentrated in Chinatown. Hiking areas are relatively limited and occur mainly in large parks, reflecting Boston’s highly urbanized character.
(Optional) Did you observe any other interesting findings?
Response: I found that some block groups in Boston are very small compared with the block group containing Logan Airport. This difference surprised me because it contrasts with the communities I am familiar with in China. Chinese communities or residential blocks tend to have relatively similar geographic sizes, since they are administrative units designed for governance and service delivery, and therefore remain uniform in area regardless of population size.