### Loading the libraries required
suppressWarnings(suppressMessages({
library(tidycensus)
library(sf)
library(tidycensus)
library(sf)
library(tmap)
library(jsonlite)
library(tidyverse)
library(httr)
library(reshape2)
library(here)
library(yelpr)
library(knitr)
}))
### Loading the APIs
readRenviron("APIs.Renviron") # File where APIs are stored
census_api <- Sys.getenv('census_api') # Census API
yelp_api <- Sys.getenv('yelp_api') # Yelp API
suppressMessages(census_api_key(census_api))
### Getting the census tracts of Brunswick
tract <- suppressMessages(get_acs(
geography = "tract",
state = "GA",
county = c("Glynn"),
variables = c(hhincome = 'B19019_001'),
year = 2022,
survey = "acs5",
geometry = TRUE,
output = "wide",
progress_bar = FALSE)) %>%
st_transform(4326) # Using WGS84 projection
### Marking Brunswick
Brunswick <- suppressMessages(tigris::places(13, progress_bar = FALSE) %>% filter(NAME == 'Brunswick') %>% st_transform(4326))
### Marking the census tracts of Brunswick
tract_Brunswick <- tract[Brunswick,]
### Viewing Brunswick and its census tracts
suppressMessages(tmap_mode('view')) # Setting to interactive viewing mode
tm_shape(tract_Brunswick) + tm_borders(lwd = 2) +
tm_shape(Brunswick) + tm_polygons(col = 'red', alpha = 0.2)
### The red polygon marks Brunswick. The surrounding black polygons are the census tracts of Brunswick.
### To find the businesses in Brunswick, we can find the centroid of Brunswick, draw a circle that emcompasses Brunswick, then select the businesses that are actually in Brunswick.
### To find the radius of our circle, we use the following function:
get_r <- function(poly, epsg_id){
# Takes: a single POLYGON or LINESTRTING
# Outputs: the radius
# Get bounding box of a given polygon
bb <- st_bbox(poly)
# Get lat & long coordinates of any one corner of the bounding box.
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
# Get centroid of the bb
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
# Get the distance between bb_p and c
r <- st_distance(bb_corner, bb_center)
# Multiply 1.05 to make the circle a bit larger.
bb_center$radius <- r*1.05
return(bb_center)
}
epsg_id <- 4326
radius <- vector("list", 1)
# We are only looking for the radius, so set the number of rows to 1.
radius[[1]] <- Brunswick %>%
st_geometry() %>% .[[1]] %>%
get_r(epsg_id = epsg_id)
radius <- bind_rows(radius)
### We put the X Y coordinates of the centroid in a separate column.
radius_4_yelp <- radius %>%
mutate(x = st_coordinates(.)[,1],
y = st_coordinates(.)[,2])
### Now we can read the yelp data using the following function:
get_yelp <- function(tract, category){
Sys.sleep(1) # Pause code by 1 second
n <- 1
# First request
resp <- business_search(api_key = yelp_api,
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50, # = 0 when n = 1
radius = round(tract$radius),
limit = 50)
# Calculate how many requests are needed in total
required_n <- ceiling(resp$total/50)
# out is where the results will be appended to.
out <- vector("list", required_n)
# Store the business information to nth slot in out
out[[n]] <- resp$businesses
# Change the name of the elements to the total required_n
# This is to know if there are more than 1000 businesses
names(out)[n] <- required_n
# Throw error if more than 1000
if (resp$total >= 1000)
{
print(glue::glue("There are more than 1000 businesses."))
return(out)
}
else
{
n <- n + 1
# Starting a loop
while(n <= required_n){
resp <- business_search(api_key = yelp_api,
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
}
# Merge all elements in the list into a single data frame
out <- out %>% bind_rows()
return(out)
}
}
### Getting data of hotels and restaurants from yelp
hotels <- vector("list", 1)
hotels[[1]] <- suppressMessages(get_yelp(radius_4_yelp[1,], "hotels"))
hotels <- hotels %>% bind_rows()
restaurants <- vector("list", 1)
restaurants[[1]] <- suppressMessages(get_yelp(radius_4_yelp[1,], "restaurants"))
restaurants <- restaurants %>% bind_rows()
### We use the following function to take the values in a column and concatenate them into a string separated by commas.
concate_list <- function(x){
titles <- x[["title"]] %>% str_c(collapse = ", ")
return(titles)
}
### We check whether our restaurants and hotels have duplicate values.
hotels_unique <- hotels %>% distinct(id, .keep_all = T)
restaurants_unique <- restaurants %>% distinct(id, .keep_all = T)
### To get the coordinates of the businesses, we flatten the data to open up coordinates column, then change it to an sf file
hotels_sf <- hotels_unique %>% jsonlite::flatten() %>% st_as_sf(coords = c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
restaurants_sf <- restaurants_unique %>% jsonlite::flatten() %>% st_as_sf(coords = c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
### Check whether any businesses has NA for its coordinates:
print(sprintf("There are %d hotels with NA values as coordinates.", sum(is.na(hotels_sf$geometry))))
## [1] "There are 0 hotels with NA values as coordinates."
print(sprintf("There are %d hotels with NA values as coordinates.", sum(is.na(restaurants_sf$geometry))))
## [1] "There are 0 hotels with NA values as coordinates."
### Finally, we use buffer to find the businesses within Brunswick.
hotels_criteria <- hotels_sf[Brunswick,]
hotels_Brunswick <- st_join(hotels_criteria, Brunswick, join = st_intersects)
restaurants_criteria <- restaurants_sf[Brunswick,]
restaurants_Brunswick <- st_join(restaurants_criteria, Brunswick, join = st_intersects)
### Since we have the location of all businesses, we can plot them on a map.
tm_shape(hotels_Brunswick) + tm_dots(col = 'blue') + # Blue shows the hotels
tm_shape(restaurants_Brunswick) + tm_dots(col = 'red') + # Red shows the restaurants
tm_shape(Brunswick) + tm_borders()
Discussion:
#1. The city I chose was Brunswick located in southern Georgia.
#2. The number of hotels in Brunswick is 9, which can be found by measuring the number of rows in hotels_Brunswick since each row represents a hotel. The number of restaurants in Brunswick is 54 and can be verified using the same method. The total number of businesses is 63.
nrow(hotels_Brunswick)
## [1] 9
nrow(restaurants_Brunswick)
## [1] 54
#3. The map demonstrates that there are two main clusters of restaurants, one located at the north of Brunswick, one located at the south. Most of the other restaurants are located along two streets.
#4. The number of hotels is 9, but the number of hotel dots on the map is 8. This suggests that two hotels have the same location.
hotels_Brunswick$location.address1
## [1] "825 Egmont St" "1505 Newcastle St" "728 Union St"
## [4] "2715 Glynn Ave" "2697 Glynn Ave" "3243 Glynn Ave"
## [7] "1001 Egmont St" "3241 Glynn Ave" "3243 Glynn Ave"
# Hotels No.6 and No. 9 have the same address, let's look at their names.
hotels_Brunswick$name[6]
## [1] "Country Hearth Inn & Suites"
hotels_Brunswick$name[9]
## [1] "Ras 2005 Inc Adsl Line"
# With a bit of searching, seems like Hotel No. 6 (Country Hearth Inn & Suites) is out of business, and Hotel No. 9 (Ras 2005 Inc Adsl Line) is some random entry, so to update the map, we should remove these 2 businesses.
hotels_Brunswick <- hotels_Brunswick[-c(6, 9), ]
# The actual number of hotels in Brunswick is 7.
# To check whether restaurants in Brunswick have the same problem, we check whether there are identical restaurant locations.
same_restaurants <- restaurants_Brunswick$location.address1[duplicated(restaurants_Brunswick$location.address1)]
same_restaurants
## character(0)
# Since the restaurant addresses are different, we can assume that each restaurant is different. Therefore, the actual number of hotels and restaurants is 7 and 54, respectively, and the total number of businesses is 61.
# Location of actual businesses
tm_shape(hotels_Brunswick) + tm_dots(col = 'blue') + # Blue shows the hotels
tm_shape(restaurants_Brunswick) + tm_dots(col = 'red') + # Red shows the restaurants
tm_shape(Brunswick) + tm_borders()