Install all required packages

tidycensus::census_api_key(Sys.getenv("census_api"))
devtools::install_github("OmaymaS/yelpr")
library(tigris)
library(tidycensus)
library(sf)
library(tmap)
library(jsonlite)
library(tidyverse)
library(httr)
library(reshape2)
library(here)
library(yelpr)
library(knitr)

Get the Census Track boundaries using the Census API

tract <- suppressMessages(
  get_acs(geography = "tract", 
          state = "CO",
          county = "Boulder", 
          variables = c(hhincome = 'B19019_001',
                        race.tot = "B02001_001", 
                        race.white = "B02001_002", 
                        race.black = 'B02001_003'
                        ),
          year = 2019,
          survey = "acs5", # American Community Survey 5-year estimate
          geometry = TRUE, # returns sf objects
          output = "wide") # wide vs. long
)
tract %>% head() %>% knitr::kable() 
GEOID NAME hhincomeE hhincomeM race.totE race.totM race.whiteE race.whiteM race.blackE race.blackM geometry
08013012607 Census Tract 126.07, Boulder County, Colorado 23699 4974 5926 655 4873 545 124 99 MULTIPOLYGON (((-105.2586 4…
08013012104 Census Tract 121.04, Boulder County, Colorado 121658 10791 2557 205 1980 208 0 12 MULTIPOLYGON (((-105.297 40…
08013013505 Census Tract 135.05, Boulder County, Colorado 47004 4698 4269 438 3863 446 33 35 MULTIPOLYGON (((-105.1024 4…
08013013308 Census Tract 133.08, Boulder County, Colorado 52031 6746 3492 235 3227 266 40 50 MULTIPOLYGON (((-105.1168 4…
08013013305 Census Tract 133.05, Boulder County, Colorado 67835 5255 5425 298 4766 396 58 64 MULTIPOLYGON (((-105.1311 4…
08013013602 Census Tract 136.02, Boulder County, Colorado 101071 17804 867 160 832 154 2 5 MULTIPOLYGON (((-105.6776 4…

Map the Tract boundaries

tmap_mode("view")
map <- tm_shape(tract) + tm_borders(col = 'blue')
map

Function to get tract-wise radius

get_r <- function(poly, epsg_id){
  #---------------------
  # Takes: a single POLYGON or LINESTRTING
  # Outputs: distance between the centroid of the boundingbox and a corner of the bounding box
  #---------------------
  
  # Get bounding box of a given polygon
  bb <- st_bbox(poly)
  # Get lat & long coordinates of any one corner of the bounding box.
  bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
  # Get centroid of the bb
  bb_center_x <- (bb[3]+bb[1])/2
  bb_center_y <- (bb[4]+bb[2])/2
  bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
    
  # Get the distance between bb_p and c
  r <- st_distance(bb_corner, bb_center)
  # Multiply 1.1 to make the circle a bit larger than the Census Tract.
  # See the Yelp explanation of their radius parameter to see why we do this.
  bb_center$radius <- r*1.2
  return(bb_center)
}

Apply the above function to each Census Tract

# We use a functional (sapply) to apply this custom function to each Census Tract.
epsg_id <- 4326
r4all_apply <- tract %>%
  st_geometry() %>% 
  st_transform(crs = epsg_id) %>% 
  lapply(., function(x) get_r(x, epsg_id = epsg_id))

r4all_apply <- bind_rows(r4all_apply)
ready_4_yelp <- r4all_apply %>% 
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])

Visualize the buffers

ready_4_yelp %>% 
  # Draw a buffer centered at the centroid of Tract polygons.
  # Radius of the buffer is the radius we just calculated using loop
  st_buffer(., dist = .$radius) %>% 
  # Display this buffer in red
  tm_shape(.) + tm_polygons(alpha = 0.3, col = 'red') +
  # Display the original polygon in blue
  tm_shape(tract) + tm_borders(col= 'blue')

Function to get the yelp data through the yelp API

# FUNCTION
get_yelp <- function(tract, category){
  # ----------------------------------
  # Gets one row of tract information (1,) and category name (str),
  # Outputs a list of business data.frame
  Sys.sleep(1)
  n <- 1
  # First request --------------------------------------------------------------
  resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                          categories = "restaurants", 
                          latitude = tract$y, 
                          longitude = tract$x, 
                          offset = (1 - 1) * 50, # = 0 when n = 1
                          radius = round(tract$radius), 
                          limit = 50)
  # Calculate how many requests are needed in total
  required_n <- ceiling(as.numeric(resp$total)/50)
  
  # out is where the results will be appended to.
  out <- vector("list", required_n)
  
  # Store the business information to nth slot in out
  out[[n]] <- resp$businesses
  
  # Change the name of the elements to the total required_n
  # This is to know if there are more than 1000 businesses,
  # we know how many.
  names(out)[n] <- required_n
  
  # Throw error if more than 1000
  if (as.numeric(resp$total) >= 1000)
  {
    # glue formats string by inserting {n} with what's currently stored in object n.
    print(glue::glue("{n}th row has >= 1000 businesses."))
    # Stop before going into the loop because we need to
    # break down Census Tract to something smaller.
    return(out)
  } 
  else 
  {
    # add 1 to n
    n <- n + 1
    
    # Now we know required_n -----------------------------------------------------
    # Starting a loop
    while(n <= required_n){
      resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                              categories = category, 
                              latitude = tract$y, 
                              longitude = tract$x, 
                              offset = (n - 1) * 50, 
                              radius = round(tract$radius), 
                              limit = 50)
      
      out[[n]] <- resp$businesses
      
      n <- n + 1
    } #<< end of while loop
    
    # Merge all elements in the list into a single data frame
    out <- out %>% bind_rows()
    
    return(out)
  }
}

Apply the get_yelp function to retrieve business data for each category (restaurants and food)

# Create a copy of the ready_4_yelp variable to use for the 'food' category
ready_4_yelp_food <- ready_4_yelp
# Prepare a collector
yelp_all_list <- vector("list", length = nrow(ready_4_yelp))
yelp_all_list_food <- vector("list", length = nrow(ready_4_yelp))

# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
  yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "restaurants"))
  yelp_all_list_food[[row]] <- suppressMessages(get_yelp(ready_4_yelp_food[row,], "food"))
}

Collapse the lists into a data frames and adding columns that describe the business category

# Collapsing the list into a data.frame
yelp_restaurants <- yelp_all_list %>% bind_rows() %>% as_tibble()
yelp_restaurants$business_category <- 'Restaurants'
yelp_food <- yelp_all_list_food %>% bind_rows() %>% as_tibble()
yelp_food$business_category <- 'Food'

# Combining data frames
combined_yelp <- rbind(yelp_restaurants, yelp_food)

Remove duplicates and count the number of businesses in each category

restaurants_no_duplicates <- yelp_restaurants[!duplicated(yelp_restaurants$name), ]
food_no_duplicates <- yelp_food[!duplicated(yelp_food$name), ]
restaurant_count <- nrow(restaurants_no_duplicates)
food_count <- nrow(food_no_duplicates)
print(paste("The number of businesses under the category 'restaurant' in Boulder is:", restaurant_count))
## [1] "The number of businesses under the category 'restaurant' in Boulder is: 859"
print(paste("The number of businesses under the category 'food' in Boulder is:", food_count))
## [1] "The number of businesses under the category 'food' in Boulder is: 756"
yelp_no_duplicates <- combined_yelp[!duplicated(combined_yelp$name), ]
all_count <- nrow(yelp_no_duplicates)
print(paste("The number of businesses under both categories in Boulder is:", all_count))
## [1] "The number of businesses under both categories in Boulder is: 1363"

Extract coordinates for each business and map it out

# Extract coordinates
yelp_sf <- yelp_no_duplicates %>% 
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>% 
  filter(!is.na(x) & !is.na(y)) %>% 
  st_as_sf(coords = c("x", "y"), crs = epsg_id)
# Map
new_map <- map + tm_shape(yelp_sf) +
  tm_bubbles(popup.vars=c("name", "review_count", "rating"), col = "rating", id="name")

new_map

Questions

What’s the county and state of your choice?

Boulder, CO.

How many businesses are there in total?

There are 1363 businesses in total.

How many businesses are there for each business category?

There are 859 restaurants and 756 food businesses after removing duplicates.

Upon visual inspection, can you see any noticeable spatial patterns to the way they are distributed across the county (e.g., clustering of businesses at some parts of the county)?

There is a large cluster of businesses around downtown boulder (Pearl Street) which was expected.There also seems to be a good amount of businesses in Longmont, which is an area that has seen some rapid growth in recent years.

(Optional) Are there any other interesting findings?

It was surprising to see that there are a few businesses up in the mountains.