tidycensus::census_api_key(Sys.getenv("CENSUS_API"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
#2. Select a U.S. city
# Get block groups for MA and then Brookline
bg <- suppressMessages(
tidycensus::get_acs(geography = "block group",
state = "MA",
county = c("Norfolk"),
year = 2023,
variables = c(hhincome = 'B19013_001'),
survey = "acs5",
geometry = TRUE,
output = "wide")
)
# City of Brookline boundary
brookline <- tigris::places('MA') %>% filter(NAME == 'Brookline')
## Retrieving data for the year 2022
#3. Divide the city into small areas
# Get BGs intersecting with the City of Brookline boundary
bg_brookline <- bg[brookline,]
#view the data
bg_brookline %>% head() %>% knitr::kable()
GEOID | NAME | hhincomeE | hhincomeM | geometry | |
---|---|---|---|---|---|
19 | 250214002021 | Block Group 1; Census Tract 4002.02; Norfolk County; Massachusetts | NA | NA | MULTIPOLYGON (((-71.12029 4… |
21 | 250214001001 | Block Group 1; Census Tract 4001; Norfolk County; Massachusetts | 101406 | 58911 | MULTIPOLYGON (((-71.11718 4… |
22 | 250214004022 | Block Group 2; Census Tract 4004.02; Norfolk County; Massachusetts | 250001 | NA | MULTIPOLYGON (((-71.13782 4… |
49 | 250214008001 | Block Group 1; Census Tract 4008; Norfolk County; Massachusetts | 117100 | 30414 | MULTIPOLYGON (((-71.12135 4… |
51 | 250214004011 | Block Group 1; Census Tract 4004.01; Norfolk County; Massachusetts | 104398 | 32122 | MULTIPOLYGON (((-71.13515 4… |
61 | 250214012013 | Block Group 3; Census Tract 4012.01; Norfolk County; Massachusetts | 250001 | NA | MULTIPOLYGON (((-71.17372 4… |
bg_brookline <- bg_brookline %>%
select(GEOID,
hhincome = hhincomeE) # New name = old name
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(bg_brookline) + tm_borders(lwd = 2) +
tm_shape(brookline) + tm_polygons(col = 'yellow', alpha = 0.4)
#4. Verify coverage
# Function: Get XY coordinates and radius
getXYRadius <- function(polygon, gcs_id, pcs_id){
# Transform the CRS to PCS. WHY?
if (st_crs(polygon) != st_crs(pcs_id)){
polygon <- polygon %>% st_transform(pcs_id)
}
# Get bounding box of a given polygon
bb <- st_bbox(polygon)
# Get XY coordinates of any one corner of the bounding box.
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = pcs_id)
# Get centroid of the bb
bb_center <- bb %>% st_as_sfc() %>% st_centroid()
# Get the distance between bb_center and bb_corner
r <- st_distance(bb_corner, bb_center)
# Convert the CRS of centroid to GCS.
bb_center <- bb_center %>% st_transform(gcs_id)
# Get longitude and latitude
xy <- bb_center %>% st_coordinates() %>% as.vector()
lon_lat_radius <- data.frame(x = xy[1],
y = xy[2],
r = r)
return(lon_lat_radius)
}
# Define EPSG codes for GCS (WGS 84) and PCS (WGS 84 / UTM zone 16N)
gcs_id <- 4326
pcs_id <- 32616
# Pre-allocate a data frame. Results will fill this data frame
bg_brookline_xyr <- data.frame(x = numeric(nrow(bg_brookline)),
y = NA,
r = NA)
# Do a for-loop
for (i in 1:nrow(bg_brookline)){
bg_brookline_xyr[i,] <- bg_brookline[i, ] %>%
getXYRadius(gcs_id = gcs_id,
pcs_id = pcs_id)
}
tmap_mode('view')
## tmap mode set to interactive viewing
bg_brookline_xyr %>%
# Convert the data frame into an sf object
st_as_sf(coords = c("x", "y"), crs = st_crs(bg_brookline)) %>%
# Draw a buffer centered at the centroid of BG polygons.
# The buffer distance is the radius we just calculated
st_buffer(dist = .$r) %>%
# Display this buffer in red
tm_shape(.) + tm_polygons(alpha = 0.1, col = 'red') +
# Display the original polygon in blue
tm_shape(bg_brookline) + tm_borders(col= 'blue')
nearbySearch <- function(lat, lon, radius, types_vec, fieldmask_vec, google_api_key){
endpoint <- "https://places.googleapis.com/v1/places:searchNearby"
body <- list(
includedTypes = as.list(types_vec),
locationRestriction = list(
circle = list(
center = list(latitude = lat, longitude = lon),
radius = radius
)
),
rankPreference = "DISTANCE"
)
resp <- POST(
endpoint,
add_headers(
"Content-Type" = "application/json",
"X-Goog-Api-Key" = google_api_key,
"X-Goog-FieldMask" = paste(fieldmask_vec, collapse = ",")),
body = body,
encode = "json"
)
data <- content(resp, as="text") %>%
jsonlite::fromJSON(flatten = T) %>%
as.data.frame()
if (nrow(data) == 20){
print("WARNING: The response has 20 rows! Consider using a smaller spatial unit.")
}
return(data)
}
#5. Collect POI data using the Nearby Search in the Google Places API.
# pre-allocate list
data_list <- vector("list", nrow(bg_brookline_xyr))
for (i in seq_len(nrow(bg_brookline_xyr))) {
data_list[[i]] <- nearbySearch(
lat = bg_brookline_xyr$y[i],
lon = bg_brookline_xyr$x[i],
radius = bg_brookline_xyr$r[i],
types_vec = c("school", "library"),
fieldmask_vec = c("places.id",
"places.displayName",
"places.formattedAddress",
"places.location",
"places.types",
"places.priceLevel",
"places.rating",
"places.userRatingCount"),
google_api_key = Sys.getenv("GOOGLE_API")
)
Sys.sleep(5)
}
## [1] "WARNING: The response has 20 rows! Consider using a smaller spatial unit."
## [1] "WARNING: The response has 20 rows! Consider using a smaller spatial unit."
## [1] "WARNING: The response has 20 rows! Consider using a smaller spatial unit."
# Combine all data frames
data_all <- dplyr::bind_rows(data_list)
#save dataframe
saveRDS(data_all, here('google_poi_data.rds'))
#6. Map the collected POIs
# Convert the data to an sf object using XY coordinates
data_all_sf <- data_all %>%
rename(x = places.location.longitude, y = places.location.latitude) %>%
filter(!is.na(x) & !is.na(y)) %>%
st_as_sf(coords = c("x", "y"), crs = 4326)
# Map
tm_shape(data_all_sf) +
tm_dots(col = "places.rating",
size = "places.userRatingCount",
palette = "magma",
popup.vars = c("Name" = "places.displayName.text",
"Rating" = "places.rating",
"Rating Count" = "places.userRatingCount")) +
tm_shape(brookline) +
tm_borders()
## Legend for symbol sizes not available in view mode.
What city did you choose?
Brookline, MA
Which two place types did you select?
School and Library
How many rows does your dataset contain?
349
Upon visual inspection, do you notice any spatial patterns in
how POIs are distributed across the city?
Yes. I observe clusters in some neighborhoods. From my experience living
in Brookline, more densely populated neighborhoods tend to have a higher
rating count, and POIs are clustered in marketplace areas within close
proximity to each other.
(Optional) Did you observe any other interesting
findings?
N/A