tidycensus::census_api_key(Sys.getenv("census_api"))
devtools::install_github("OmaymaS/yelpr")
library(tigris)
library(tidycensus)
library(sf)
library(tmap)
library(jsonlite)
library(tidyverse)
library(httr)
library(reshape2)
library(here)
library(yelpr)
library(knitr)
tract <- suppressMessages(
get_acs(geography = "tract",
state = "CO",
county = "Boulder",
variables = c(hhincome = 'B19019_001',
race.tot = "B02001_001",
race.white = "B02001_002",
race.black = 'B02001_003'
),
year = 2019,
survey = "acs5", # American Community Survey 5-year estimate
geometry = TRUE, # returns sf objects
output = "wide") # wide vs. long
)
tract %>% head() %>% knitr::kable()
| GEOID | NAME | hhincomeE | hhincomeM | race.totE | race.totM | race.whiteE | race.whiteM | race.blackE | race.blackM | geometry |
|---|---|---|---|---|---|---|---|---|---|---|
| 08013012607 | Census Tract 126.07, Boulder County, Colorado | 23699 | 4974 | 5926 | 655 | 4873 | 545 | 124 | 99 | MULTIPOLYGON (((-105.2586 4… |
| 08013012104 | Census Tract 121.04, Boulder County, Colorado | 121658 | 10791 | 2557 | 205 | 1980 | 208 | 0 | 12 | MULTIPOLYGON (((-105.297 40… |
| 08013013505 | Census Tract 135.05, Boulder County, Colorado | 47004 | 4698 | 4269 | 438 | 3863 | 446 | 33 | 35 | MULTIPOLYGON (((-105.1024 4… |
| 08013013308 | Census Tract 133.08, Boulder County, Colorado | 52031 | 6746 | 3492 | 235 | 3227 | 266 | 40 | 50 | MULTIPOLYGON (((-105.1168 4… |
| 08013013305 | Census Tract 133.05, Boulder County, Colorado | 67835 | 5255 | 5425 | 298 | 4766 | 396 | 58 | 64 | MULTIPOLYGON (((-105.1311 4… |
| 08013013602 | Census Tract 136.02, Boulder County, Colorado | 101071 | 17804 | 867 | 160 | 832 | 154 | 2 | 5 | MULTIPOLYGON (((-105.6776 4… |
tmap_mode("view")
map <- tm_shape(tract) + tm_borders(col = 'blue')
map
get_r <- function(poly, epsg_id){
#---------------------
# Takes: a single POLYGON or LINESTRTING
# Outputs: distance between the centroid of the boundingbox and a corner of the bounding box
#---------------------
# Get bounding box of a given polygon
bb <- st_bbox(poly)
# Get lat & long coordinates of any one corner of the bounding box.
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
# Get centroid of the bb
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
# Get the distance between bb_p and c
r <- st_distance(bb_corner, bb_center)
# Multiply 1.1 to make the circle a bit larger than the Census Tract.
# See the Yelp explanation of their radius parameter to see why we do this.
bb_center$radius <- r*1.2
return(bb_center)
}
# We use a functional (sapply) to apply this custom function to each Census Tract.
epsg_id <- 4326
r4all_apply <- tract %>%
st_geometry() %>%
st_transform(crs = epsg_id) %>%
lapply(., function(x) get_r(x, epsg_id = epsg_id))
r4all_apply <- bind_rows(r4all_apply)
ready_4_yelp <- r4all_apply %>%
mutate(x = st_coordinates(.)[,1],
y = st_coordinates(.)[,2])
ready_4_yelp %>%
# Draw a buffer centered at the centroid of Tract polygons.
# Radius of the buffer is the radius we just calculated using loop
st_buffer(., dist = .$radius) %>%
# Display this buffer in red
tm_shape(.) + tm_polygons(alpha = 0.3, col = 'red') +
# Display the original polygon in blue
tm_shape(tract) + tm_borders(col= 'blue')
# FUNCTION
get_yelp <- function(tract, category){
# ----------------------------------
# Gets one row of tract information (1,) and category name (str),
# Outputs a list of business data.frame
Sys.sleep(1)
n <- 1
# First request --------------------------------------------------------------
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = "restaurants",
latitude = tract$y,
longitude = tract$x,
offset = (1 - 1) * 50, # = 0 when n = 1
radius = round(tract$radius),
limit = 50)
# Calculate how many requests are needed in total
required_n <- ceiling(as.numeric(resp$total)/50)
# out is where the results will be appended to.
out <- vector("list", required_n)
# Store the business information to nth slot in out
out[[n]] <- resp$businesses
# Change the name of the elements to the total required_n
# This is to know if there are more than 1000 businesses,
# we know how many.
names(out)[n] <- required_n
# Throw error if more than 1000
if (as.numeric(resp$total) >= 1000)
{
# glue formats string by inserting {n} with what's currently stored in object n.
print(glue::glue("{n}th row has >= 1000 businesses."))
# Stop before going into the loop because we need to
# break down Census Tract to something smaller.
return(out)
}
else
{
# add 1 to n
n <- n + 1
# Now we know required_n -----------------------------------------------------
# Starting a loop
while(n <= required_n){
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
} #<< end of while loop
# Merge all elements in the list into a single data frame
out <- out %>% bind_rows()
return(out)
}
}
# Create a copy of the ready_4_yelp variable to use for the 'food' category
ready_4_yelp_food <- ready_4_yelp
# Prepare a collector
yelp_all_list <- vector("list", length = nrow(ready_4_yelp))
yelp_all_list_food <- vector("list", length = nrow(ready_4_yelp))
# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "restaurants"))
yelp_all_list_food[[row]] <- suppressMessages(get_yelp(ready_4_yelp_food[row,], "food"))
}
# Collapsing the list into a data.frame
yelp_restaurants <- yelp_all_list %>% bind_rows() %>% as_tibble()
yelp_restaurants$business_category <- 'Restaurants'
yelp_food <- yelp_all_list_food %>% bind_rows() %>% as_tibble()
yelp_food$business_category <- 'Food'
# Combining data frames
combined_yelp <- rbind(yelp_restaurants, yelp_food)
restaurants_no_duplicates <- yelp_restaurants[!duplicated(yelp_restaurants$name), ]
food_no_duplicates <- yelp_food[!duplicated(yelp_food$name), ]
restaurant_count <- nrow(restaurants_no_duplicates)
food_count <- nrow(food_no_duplicates)
print(paste("The number of businesses under the category 'restaurant' in Boulder is:", restaurant_count))
## [1] "The number of businesses under the category 'restaurant' in Boulder is: 859"
print(paste("The number of businesses under the category 'food' in Boulder is:", food_count))
## [1] "The number of businesses under the category 'food' in Boulder is: 756"
yelp_no_duplicates <- combined_yelp[!duplicated(combined_yelp$name), ]
all_count <- nrow(yelp_no_duplicates)
print(paste("The number of businesses under both categories in Boulder is:", all_count))
## [1] "The number of businesses under both categories in Boulder is: 1363"
# Extract coordinates
yelp_sf <- yelp_no_duplicates %>%
mutate(x = .$coordinates$longitude,
y = .$coordinates$latitude) %>%
filter(!is.na(x) & !is.na(y)) %>%
st_as_sf(coords = c("x", "y"), crs = epsg_id)
# Map
new_map <- map + tm_shape(yelp_sf) +
tm_bubbles(popup.vars=c("name", "review_count", "rating"), col = "rating", id="name")
new_map
Boulder, CO.
There are 1363 businesses in total.
There are 859 restaurants and 756 food businesses after removing duplicates.
There is a large cluster of businesses around downtown boulder (Pearl Street) which was expected.There also seems to be a good amount of businesses in Longmont, which is an area that has seen some rapid growth in recent years.
It was surprising to see that there are a few businesses up in the mountains.