Census & Yelp API

Yanfu Bai

2024-09-12

tidycensus::census_api_key(Sys.getenv("census_api"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.

The city that I choose is Champaign, Illinois. ## Getting Census Tract Boundary for Champaign, IL

tract <- suppressMessages(
  get_acs(geography = "tract", 
          state = "IL",
          county = c("Champaign"), 
          variables = c(hhincome = 'B19019_001'),
          year = 2021,
          survey = "acs5", 
          geometry = TRUE, 
          output = "wide")
)

champaign <- tigris::places('IL') %>% 
  filter(NAME == 'Champaign') 
## Retrieving data for the year 2022
tract_champaign <- tract[champaign,]
tract_champaign <- tract_champaign %>% 
  select(GEOID, 
         hhincome = hhincomeE) 

tract_champaign <- tract_champaign %>%
  filter(GEOID != "17019010604") #Removing one tract which is too large

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tract_champaign) + tm_polygons(col='brown', alpha=0.1) + 
  tm_shape(champaign) + tm_polygons(col = 'cyan', alpha = 0.4)

Function to get tract-wise radius:

get_r <- function(poly, epsg_id){
  bb <- st_bbox(poly)
  bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
  bb_center_x <- (bb[3]+bb[1])/2
  bb_center_y <- (bb[4]+bb[2])/2
  bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
    
  r <- st_distance(bb_corner, bb_center)
  bb_center$radius <- r*1.1
  return(bb_center)
}
epsg_id <- 4326

r4all_apply <- tract_champaign %>%
  st_geometry() %>% 
  st_transform(crs = epsg_id) %>% 
  lapply(., function(x) get_r(x, epsg_id = epsg_id))

r4all_apply <- bind_rows(r4all_apply)
ready_4_yelp <- r4all_apply %>% 
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])
tmap_mode('view')
## tmap mode set to interactive viewing
ready_4_yelp %>% 
  # Draw a buffer centered at the centroid of Tract polygons.
  # Radius of the buffer is the radius we just calculated using loop
  st_buffer(., dist = .$radius) %>% 
  # Display this buffer in red
  tm_shape(.) + tm_polygons(alpha = 0.2, col = 'red') +
  # Display the original polygon in blue
  tm_shape(tract_champaign) + tm_borders(col= 'blue')

We can now start with writing a function using business_search in yelpr.

# FUNCTION
get_yelp <- function(tract, category){
  # ----------------------------------
  # Gets one row of tract information (1,) and category name (str),
  # Outputs a list of business data.frame
  Sys.sleep(1)
  n <- 1
  # First request --------------------------------------------------------------
  resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                          term = category, 
                          latitude = tract$y, 
                          longitude = tract$x, 
                          offset = (n - 1) * 50, # = 0 when n = 1
                          radius = round(tract$radius), 
                          limit = 50)
  # Calculate how many requests are needed in total
  required_n <- ceiling(resp$total/50)
  
  # out is where the results will be appended to.
  out <- vector("list", required_n)
  
  # Store the business information to nth slot in out
  out[[n]] <- resp$businesses
  
  # Change the name of the elements to the total required_n
  # This is to know if there are more than 1000 businesses,
  # we know how many.
  names(out)[n] <- required_n
  
  # Throw error if more than 1000
  if (resp$total >= 1000)
  {
    # glue formats string by inserting {n} with what's currently stored in object n.
    print(glue::glue("{n}th row has >= 1000 businesses."))
    # Stop before going into the loop because we need to
    # break down Census Tract to something smaller.
    return(out)
  } 
  else 
  {
    # add 1 to n
    n <- n + 1
    
    # Now we know required_n -----------------------------------------------------
    # Starting a loop
    while(n <= required_n){
      resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                              categories = category, 
                              latitude = tract$y, 
                              longitude = tract$x, 
                              offset = (n - 1) * 50, 
                              radius = round(tract$radius), 
                              limit = 50)
      
      out[[n]] <- resp$businesses
      
      n <- n + 1
    } #<< end of while loop
    
    # Merge all elements in the list into a single data frame
    out <- out %>% bind_rows()
    
    return(out)
  }
}

We can loop through all census tracts to find two categories of businesses: gas station and auto repair.

# Prepare 2 collectors
yelp_all_list1 <- vector("list", nrow(ready_4_yelp))
yelp_all_list2 <- vector("list", nrow(ready_4_yelp))

# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
  yelp_all_list1[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "gas stations"))
  print(paste0("Current row: ", row))
}
## [1] "Current row: 1"
## [1] "Current row: 2"
## [1] "Current row: 3"
## [1] "Current row: 4"
## [1] "Current row: 5"
## [1] "Current row: 6"
## [1] "Current row: 7"
## [1] "Current row: 8"
## [1] "Current row: 9"
## [1] "Current row: 10"
## [1] "Current row: 11"
## [1] "Current row: 12"
## [1] "Current row: 13"
## [1] "Current row: 14"
## [1] "Current row: 15"
## [1] "Current row: 16"
## [1] "Current row: 17"
## [1] "Current row: 18"
## [1] "Current row: 19"
## [1] "Current row: 20"
## [1] "Current row: 21"
## [1] "Current row: 22"
## [1] "Current row: 23"
## [1] "Current row: 24"
## [1] "Current row: 25"
## [1] "Current row: 26"
for (row in 1:nrow(ready_4_yelp)){
  yelp_all_list2[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "auto repair"))
  print(paste0("Current row: ", row))
}
## [1] "Current row: 1"
## [1] "Current row: 2"
## [1] "Current row: 3"
## [1] "Current row: 4"
## [1] "Current row: 5"
## [1] "Current row: 6"
## [1] "Current row: 7"
## [1] "Current row: 8"
## [1] "Current row: 9"
## [1] "Current row: 10"
## [1] "Current row: 11"
## [1] "Current row: 12"
## [1] "Current row: 13"
## [1] "Current row: 14"
## [1] "Current row: 15"
## [1] "Current row: 16"
## [1] "Current row: 17"
## [1] "Current row: 18"
## [1] "Current row: 19"
## [1] "Current row: 20"
## [1] "Current row: 21"
## [1] "Current row: 22"
## [1] "Current row: 23"
## [1] "Current row: 24"
## [1] "Current row: 25"
## [1] "Current row: 26"
# Add a row indicating whether it is gas station or auto repair
yelp_all_list1 <- yelp_all_list1 %>% bind_rows() %>% as_tibble() %>% mutate(type = "gas station")
yelp_all_list2 <- yelp_all_list2 %>% bind_rows() %>% as_tibble() %>% mutate(type = "auto repair")

# Collapsing the list into a data.frame
yelp_all <- yelp_all_list1 %>% bind_rows(yelp_all_list2) %>% as_tibble()

# print
yelp_all %>% print(width=1000)
## # A tibble: 653 × 19
##    id                     alias                              
##    <chr>                  <chr>                              
##  1 F7JWSELLVvDLBVBTVCmu0g circle-k-champaign                 
##  2 dY5bImdQbT_6V7_kPrn1HQ circle-k-champaign-10              
##  3 vyCTd76g2aGqWvY1229ofw cvs-pharmacy-champaign-3           
##  4 GKQWjnloUthLFGsSsrpZUw reynolds-towing-service-urbana-2   
##  5 lAoWkjChB2BkvPJXWD3AFA road-ready-truck-and-tire-champaign
##  6 EO6ZQJwWdzTbcL3WMrWs0w mobil-champaign-3                  
##  7 4sKzg4-WSkucvHsjre4wcw costco-champaign                   
##  8 7JtxfWCmrKFnozNGK8Gj7A shell-champaign                    
##  9 CI85fp3NLH0GGoUXBk3Gzw circle-k-champaign-7               
## 10 lVbQzYza0POviyTCBEnVjg mach-1-champaign-2                 
##    name                   
##    <chr>                  
##  1 Circle K               
##  2 Circle K               
##  3 CVS Pharmacy           
##  4 Reynolds Towing Service
##  5 Road Ready Truck & Tire
##  6 Mobil                  
##  7 Costco                 
##  8 Shell                  
##  9 Circle K               
## 10 Mach 1                 
##    image_url                                                             
##    <chr>                                                                 
##  1 "https://s3-media4.fl.yelpcdn.com/bphoto/lODk-3lvG9jUxOiSQeSibQ/o.jpg"
##  2 "https://s3-media3.fl.yelpcdn.com/bphoto/RyxizO-1U5-1c0kg87T-pg/o.jpg"
##  3 "https://s3-media1.fl.yelpcdn.com/bphoto/7vRKcO6fIxSxaq7WKvroUw/o.jpg"
##  4 "https://s3-media3.fl.yelpcdn.com/bphoto/-6MytL-R4CMb1EJC7F_H6g/o.jpg"
##  5 ""                                                                    
##  6 ""                                                                    
##  7 "https://s3-media2.fl.yelpcdn.com/bphoto/7bee76lvKxAW6fJVMbUdDw/o.jpg"
##  8 ""                                                                    
##  9 ""                                                                    
## 10 "https://s3-media3.fl.yelpcdn.com/bphoto/EzzMPtxdTfbdIKZ4MpTU0g/o.jpg"
##    is_closed
##    <lgl>    
##  1 FALSE    
##  2 FALSE    
##  3 FALSE    
##  4 FALSE    
##  5 FALSE    
##  6 FALSE    
##  7 FALSE    
##  8 FALSE    
##  9 FALSE    
## 10 FALSE    
##    url                                                                          
##    <chr>                                                                        
##  1 https://www.yelp.com/biz/circle-k-champaign?adjust_creative=4ZMcoYhbkuS8Q062…
##  2 https://www.yelp.com/biz/circle-k-champaign-10?adjust_creative=4ZMcoYhbkuS8Q…
##  3 https://www.yelp.com/biz/cvs-pharmacy-champaign-3?adjust_creative=4ZMcoYhbku…
##  4 https://www.yelp.com/biz/reynolds-towing-service-urbana-2?adjust_creative=4Z…
##  5 https://www.yelp.com/biz/road-ready-truck-and-tire-champaign?adjust_creative…
##  6 https://www.yelp.com/biz/mobil-champaign-3?adjust_creative=4ZMcoYhbkuS8Q062L…
##  7 https://www.yelp.com/biz/costco-champaign?adjust_creative=4ZMcoYhbkuS8Q062Le…
##  8 https://www.yelp.com/biz/shell-champaign?adjust_creative=4ZMcoYhbkuS8Q062Le5…
##  9 https://www.yelp.com/biz/circle-k-champaign-7?adjust_creative=4ZMcoYhbkuS8Q0…
## 10 https://www.yelp.com/biz/mach-1-champaign-2?adjust_creative=4ZMcoYhbkuS8Q062…
##    review_count categories   rating coordinates$latitude $longitude transactions
##           <int> <list>        <dbl>                <dbl>      <dbl> <list>      
##  1            2 <df [2 × 2]>    2.5                 40.1      -88.2 <chr [0]>   
##  2            2 <df [2 × 2]>    1                   40.1      -88.2 <chr [0]>   
##  3           22 <df [3 × 2]>    2                   40.1      -88.2 <chr [1]>   
##  4           23 <df [1 × 2]>    3                   40.1      -88.2 <chr [0]>   
##  5            2 <df [3 × 2]>    3                   40.2      -88.2 <chr [0]>   
##  6            1 <df [1 × 2]>    1                   40.1      -88.2 <list [0]>  
##  7           19 <df [3 × 2]>    4                   40.1      -88.2 <list [0]>  
##  8            1 <df [2 × 2]>    1                   40.1      -88.3 <list [0]>  
##  9            1 <df [2 × 2]>    1                   40.1      -88.3 <list [0]>  
## 10            3 <df [2 × 2]>    2.3                 40.1      -88.3 <list [0]>  
##    price location$address1    $address2 $address3 $city     $zip_code $country
##    <chr> <chr>                <chr>     <chr>     <chr>     <chr>     <chr>   
##  1 $     1301 S Neil St       <NA>      <NA>      Champaign 61820     US      
##  2 <NA>  59 E Green St        ""        <NA>      Champaign 61820     US      
##  3 $$    107 West Green St    ""        ""        Champaign 61820     US      
##  4 <NA>  1417 W Kenyon Rd     ""        ""        Urbana    61801     US      
##  5 <NA>  51 E Leverett Rd     ""        ""        Champaign 61822     US      
##  6 <NA>  1503 N Neil St       ""        <NA>      Champaign 61820     US      
##  7 $$    2002 N Neil St       ""        <NA>      Champaign 61820     US      
##  8 <NA>  1406 N Prospect Ave  ""        <NA>      Champaign 61820     US      
##  9 <NA>  1511 N Prospect Ave  ""        <NA>      Champaign 61820     US      
## 10 $     902 W Bloomington Rd ""        ""        Champaign 61821     US      
##    $state $display_address phone        display_phone  distance business_hours
##    <chr>  <list>           <chr>        <chr>             <dbl> <list>        
##  1 IL     <chr [2]>        +12173985868 (217) 398-5868     639. <df [1 × 3]>  
##  2 IL     <chr [2]>        +12173448722 (217) 344-8722    1085. <df [1 × 3]>  
##  3 IL     <chr [2]>        +12173558123 (217) 355-8123     706. <df [1 × 3]>  
##  4 IL     <chr [2]>        +12173370913 (217) 337-0913    3630. <df [1 × 3]>  
##  5 IL     <chr [2]>        +12176432800 (217) 643-2800    8243. <df [1 × 3]>  
##  6 IL     <chr [2]>        +12173551872 (217) 355-1872     310. <df [1 × 3]>  
##  7 IL     <chr [2]>        +12176006546 (217) 600-6546    1353. <df [1 × 3]>  
##  8 IL     <chr [2]>        +12173984930 (217) 398-4930    1095. <df [1 × 3]>  
##  9 IL     <chr [2]>        +12173555271 (217) 355-5271    1205. <df [1 × 3]>  
## 10 IL     <chr [2]>        +12175008220 (217) 500-8220    1271. <df [1 × 3]>  
##    attributes$business_temp_closed $waitlist_reservation $menu_url $open24_hours
##    <lgl>                           <lgl>                 <chr>     <lgl>        
##  1 NA                              NA                    <NA>      NA           
##  2 NA                              NA                    <NA>      NA           
##  3 NA                              NA                    <NA>      NA           
##  4 NA                              NA                    <NA>      NA           
##  5 NA                              NA                    <NA>      NA           
##  6 NA                              NA                    <NA>      NA           
##  7 NA                              NA                    <NA>      NA           
##  8 NA                              NA                    <NA>      NA           
##  9 NA                              NA                    <NA>      NA           
## 10 NA                              NA                    <NA>      NA           
##    type       
##    <chr>      
##  1 gas station
##  2 gas station
##  3 gas station
##  4 gas station
##  5 gas station
##  6 gas station
##  7 gas station
##  8 gas station
##  9 gas station
## 10 gas station
## # ℹ 643 more rows
# Extract coordinates
yelp_sf <- yelp_all %>% 
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>% 
  filter(!is.na(x) & !is.na(y)) %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326)

Some results are not in our desired region. We can filter out the results that are in Champaign only.

tract_champaign_transformed <- st_transform(tract_champaign, 4326)
within_matrix <- st_within(yelp_sf, tract_champaign_transformed, sparse = FALSE)
within_vector <- apply(within_matrix, 1, any)

yelp_sf1 <- yelp_sf %>% filter(within_vector)
yelp_sf1
## Simple feature collection with 495 features and 19 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -88.31897 ymin: 40.05884 xmax: -88.20487 ymax: 40.15636
## Geodetic CRS:  WGS 84
## # A tibble: 495 × 20
##    id       alias name  image_url is_closed url   review_count categories rating
##  * <chr>    <chr> <chr> <chr>     <lgl>     <chr>        <int> <list>      <dbl>
##  1 F7JWSEL… circ… Circ… "https:/… FALSE     http…            2 <df>          2.5
##  2 dY5bImd… circ… Circ… "https:/… FALSE     http…            2 <df>          1  
##  3 vyCTd76… cvs-… CVS … "https:/… FALSE     http…           22 <df>          2  
##  4 GKQWjnl… reyn… Reyn… "https:/… FALSE     http…           23 <df>          3  
##  5 EO6ZQJw… mobi… Mobil ""        FALSE     http…            1 <df>          1  
##  6 4sKzg4-… cost… Cost… "https:/… FALSE     http…           19 <df>          4  
##  7 7JtxfWC… shel… Shell ""        FALSE     http…            1 <df>          1  
##  8 CI85fp3… circ… Circ… ""        FALSE     http…            1 <df>          1  
##  9 lVbQzYz… mach… Mach… "https:/… FALSE     http…            3 <df>          2.3
## 10 Zyq6S_M… circ… Circ… "https:/… FALSE     http…            3 <df>          2.3
## # ℹ 485 more rows
## # ℹ 11 more variables: coordinates <df[,2]>, transactions <list>, price <chr>,
## #   location <df[,8]>, phone <chr>, display_phone <chr>, distance <dbl>,
## #   business_hours <list>, attributes <df[,4]>, type <chr>,
## #   geometry <POINT [°]>

There are a total of 496 businesses in Champaign for gas stations and auto repair.

nrow(yelp_sf1)
## [1] 495

There are 355 auto repair shops and 141 gas stations in Champaign area.

yelp_sf1 %>% count(type)
## Simple feature collection with 2 features and 2 fields
## Geometry type: MULTIPOINT
## Dimension:     XY
## Bounding box:  xmin: -88.31897 ymin: 40.05884 xmax: -88.20487 ymax: 40.15636
## Geodetic CRS:  WGS 84
## # A tibble: 2 × 3
##   type            n                                                     geometry
## * <chr>       <int>                                             <MULTIPOINT [°]>
## 1 auto repair   354 ((-88.31897 40.11364), (-88.31738 40.11496), (-88.31222 40.…
## 2 gas station   141 ((-88.31525 40.11371), (-88.28559 40.11257), (-88.28643 40.…

Mapping out all gas stations and auto repairs in Champaign:

# Map
tm_shape(yelp_sf1) +
  tm_dots(col = "type", 
          style="quantile", 
          shapes.labels="name",
          popup.vars=c("Name"="name", "Reviews"="review_count", "Rating"="rating")
          )

Upon visual inspection, we can see that the gas stations and auto repair shops all have a clustering pattern along major highways, notably the north-south Neil Street and the east-west Springfield Avenue. There is also a significant cluster on the northern suburbs around the Interstate 74 and Towncenter Mall region.