library(tidyverse)
library(tidycensus)
library(sf)
library(tmap)
library(jsonlite)
library(httr)
library(jsonlite)
library(reshape2)
library(here)
library(yelpr)
library(knitr)
library(tigris)
# Accessing Census Data
tract_c <- suppressMessages(
get_acs(geography = "tract",
state = "GA",
county = c("Fulton", "Dekalb"),
variables = c(hhincome = 'B19019_001',
trans.total = "B08006_001",
trans.car = "B08006_002",
trans.pubtrans = "B08006_008",
trans.bicycle = "B08006_014",
trans.walk = "B08006_015",
trans.WfH = "B08006_017"),
year = 2019,
survey = "acs5", # American Community Survey 5-year estimate
geometry = TRUE, # returns sf objects
output = "wide") # wide vs. long
)
# Renaming variables
tract1 <- tract_c %>%
select(GEOID,
hhincome = hhincomeE,
trans.total = trans.totalE,
trans.car = trans.carE,
trans.pubtrans = trans.pubtransE,
trans.bicycle = trans.bicycleE,
trans.walk = trans.walkE,
trans.WfH = trans.WfHE)
# Selecting tracts in Atlanta
atlanta <- places('GA') %>%
filter(NAME == 'Atlanta')
## Retrieving data for the year 2021
tract <- tract1[atlanta,]
# Visualising census data
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tract) + tm_polygons("trans.bicycle") + tm_shape(atlanta) + tm_borders(col="blue")
# Using custom function to get a radius around a census tract
get_r <- function(poly, epsg_id){
# Getting a bounding box for a polygon
bb <- st_bbox(poly)
# Getting the lat & long coordinates of any one corner of the bounding box.
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
# Getting centroid of the bb
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
# Getting the distance between bb_p and c
r <- st_distance(bb_corner, bb_center)
# Multiplying 1.1 to make the circle a bit larger than the Census Tract.
bb_center$radius <- r*1.2
return(bb_center)
}
# Using a loop to get a radius around all census tracts
epsg_id <- 4326
r4all_loop <- vector("list", nrow(tract))
# Starting a for-loop
for (i in 1:nrow(tract)){
r4all_loop[[i]] <- tract %>%
st_transform(crs = epsg_id) %>%
st_geometry() %>%
.[[i]] %>%
get_r(epsg_id = epsg_id)
}
r4all_loop <- bind_rows(r4all_loop)
r4all_apply <- tract %>%
st_geometry() %>%
st_transform(crs = epsg_id) %>%
lapply(., function(x) get_r(x, epsg_id = epsg_id))
r4all_apply <- bind_rows(r4all_apply)
identical(r4all_apply, r4all_loop)
ready_4_yelp <- r4all_apply %>%
mutate(x = st_coordinates(.)[,1],
y = st_coordinates(.)[,2])
tmap_mode('view')
## tmap mode set to interactive viewing
ready_4_yelp[1:10,] %>%
st_buffer(., dist = .$radius) %>%
tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +
tm_shape(tract[1:10,]) + tm_borders(col= 'blue')
# A] Selected business category - Bike Rental
# Using Yelp Business Search function
which_tract <- 1
test <- business_search(api_key = Sys.getenv('yelp_api'), # like we did for census, store your api key
categories = 'bikerentals', # return only restaurant businesses
latitude = ready_4_yelp$y[which_tract],
longitude = ready_4_yelp$x[which_tract],
offset = 0, # 1st page, 1st obs
radius = round(ready_4_yelp$radius[which_tract]), # radius requires integer value
limit = 50) # how many business per page
## No encoding supplied: defaulting to UTF-8.
lapply(test, head)
names(test)
# Finding number of businesses
paste0("is it a data.frame?: ", is.data.frame(test$businesses), ", ",
" how many rows?: ", nrow(test$businesses), ", ",
" how many columns?: ", ncol(test$businesses))
# Applying a custom function to getting business data for census tracts
get_yelp <- function(tract, category){
Sys.sleep(1)
n <- 1
# Making the first request
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50, # = 0 when n = 1
radius = round(tract$radius),
limit = 50)
# Calculating how many requests are needed in total
required_n <- ceiling(resp$total/50)
out <- vector("list", required_n)
out[[n]] <- resp$businesses
names(out)[n] <- required_n
if (resp$total >= 1000)
{
print(glue::glue("{n}th row has >= 1000 businesses."))
return(out)
}
else
{
n <- n + 1
# Starting a loop
while(n <= required_n){
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
}
# Merging all elements in the list into a single data frame
out <- out %>% bind_rows()
return(out)
}
}
yelp_first_tract <- get_yelp(ready_4_yelp[1,], 'bikerentals') %>% as_tibble()
## No encoding supplied: defaulting to UTF-8.
yelp_first_tract %>% print
# Preparing a collector
yelp_all_list <- vector("list", nrow(ready_4_yelp))
# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "bikerentals"))
if (row %% 10 == 0){
print(paste0("Current row: ", row))
}
}
# Collapsing the list into a data.frame
yelp_all <- yelp_all_list %>% bind_rows() %>% as_tibble()
yelp_all %>% print(width=1000)
# Extracting coordinates
yelp_sf <- yelp_all %>%
mutate(x = .$coordinates$longitude,
y = .$coordinates$latitude) %>%
filter(!is.na(x) & !is.na(y)) %>%
st_as_sf(coords = c("x", "y"), crs = 4326)
# Deleting duplicate rows
yelp_unique <- yelp_sf %>%
distinct(id, .keep_all=T)
glue::glue("Before dropping duplicated rows, there were {nrow(yelp_sf)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>%
print()
# Flattening nested categories column
concate_list <- function(x){
titles <- x[["title"]] %>% str_c(collapse = ", ")
return(titles)
}
yelp_flat <- yelp_unique %>%
mutate(categories = categories %>% map_chr(concate_list))
yelp_flat %>% print(width = 1000)
# Deleting rows outside Atlanta
st_crs(tract) <- 4326 # making sure both the tracts and yelp data have the same CRS
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
# Converting to sf
yelp_sfc <- yelp_flat %>%
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
# subsetting
yelp_in <- yelp_sfc[tract %>% st_union(), ,op = st_intersects]
# Joining Census and Yelp Data
yelp_in_tract <- st_join(tract, yelp_in, join = st_intersects)
# Getting count of Bike Rentals in each tract
yelp_count_tract <- count(as_tibble(yelp_in_tract), GEOID) %>%
print()
test <- st_join(tract, yelp_in %>% mutate(count = 1))
out <- test %>%
group_by(GEOID) %>%
summarise(count = sum(count, na.rm = T))
tract_count_join <- tract %>%
left_join(out %>% st_set_geometry(NULL), by = "GEOID")
The main purpose of this exercise is to find suitable areas to locate a bike rental store by analysing the gap between bike stores and tracts with bike commuters. The data tidied above was examined below using map and graphs.
# A new variable was introduced for identifying which tracts have more bike commuters. The variable introduced considered the proportion of bike commuters among all commuters in a given tract. This variable provides a better representation on the concentration/demand of bike commuters as compared to absolute numbers.
# Adding new variable
yelp_in_tract1 <- mutate(yelp_in_tract, proportion_bikers = (trans.bicycle/trans.total)*100)
# Mapping
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(yelp_in_tract1) + tm_polygons("proportion_bikers") + tm_shape(atlanta) + tm_borders(col="blue") + tm_shape(yelp_in) + tm_dots()
# Ideally, a bike rental will have more deman in lower income tracts that have low car ownership. Therefore this graph visualises the distribution of existing bike rental stores in relation to the meam household income
ggplot(tract_count_join, aes(x=hhincome, y=count)) +
geom_point() +
ylab("Number of Bike rentals and Household Median Income in Tract")
## Warning: Removed 3 rows containing missing values (`geom_point()`).
# Lastly this graph depicts the distance to the nearest bike rental store for each census tract
dist <- sf::st_distance(yelp_in_tract1, yelp_in)
hist(dist)
Proportion of bikers and location of existing bike rental stores - the map shows that all the existing bike rental stores were in close proximity to census tracts having a high proportion of bikers [6-8%].
HH income and distribution of existing bike rental stores - the graph shows that majority of the stores were in tracts with mean HH income [between USD 50,000 to 100,000] and emphasises the lack of stores in lower and very high income neighborhoods.
Distance to the nearest bike rental store for each census tract - indicates that majority of the census tracts are withing a distance of 2 - 6 miles form any of the existing bike rental stores.