Mini Assignment 3

library(tidyverse)
library(tidycensus)
library(sf)
library(tmap)
library(jsonlite)
library(httr)
library(jsonlite)
library(reshape2)
library(here)
library(yelpr)
library(knitr)
library(tigris)

Downloading Census Data

# Accessing Census Data
tract_c <- suppressMessages(
  get_acs(geography = "tract",
          state = "GA",
          county = c("Fulton", "Dekalb"),
          variables = c(hhincome = 'B19019_001',
                        trans.total = "B08006_001",
                        trans.car = "B08006_002",
                        trans.pubtrans = "B08006_008", 
                        trans.bicycle = "B08006_014",
                        trans.walk = "B08006_015",
                        trans.WfH = "B08006_017"),
          year = 2019,
          survey = "acs5", # American Community Survey 5-year estimate
          geometry = TRUE, # returns sf objects
          output = "wide") # wide vs. long
)

# Renaming variables
tract1 <- tract_c %>%
  select(GEOID,
         hhincome = hhincomeE, 
         trans.total = trans.totalE,
         trans.car = trans.carE,
         trans.pubtrans = trans.pubtransE,
         trans.bicycle = trans.bicycleE,
         trans.walk = trans.walkE,
         trans.WfH = trans.WfHE)

# Selecting tracts in Atlanta
atlanta <- places('GA') %>% 
  filter(NAME == 'Atlanta')

## Retrieving data for the year 2021

tract <- tract1[atlanta,]

# Visualising census data
tmap_mode("view")

## tmap mode set to interactive viewing

tm_shape(tract) + tm_polygons("trans.bicycle") + tm_shape(atlanta) + tm_borders(col="blue")

Downloading Yelp Data

# Using custom function to get a radius around a census tract
get_r <- function(poly, epsg_id){

  # Getting a bounding box for a polygon
  bb <- st_bbox(poly)
  # Getting the lat & long coordinates of any one corner of the bounding box.
  bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
  # Getting centroid of the bb
  bb_center_x <- (bb[3]+bb[1])/2
  bb_center_y <- (bb[4]+bb[2])/2
  bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
  
  # Getting the distance between bb_p and c
  r <- st_distance(bb_corner, bb_center)
  # Multiplying 1.1 to make the circle a bit larger than the Census Tract.
  bb_center$radius <- r*1.2
  return(bb_center)
}

# Using a loop to get a radius around all census tracts

epsg_id <- 4326

r4all_loop <- vector("list", nrow(tract))

# Starting a for-loop

for (i in 1:nrow(tract)){
  r4all_loop[[i]] <- tract %>% 
    st_transform(crs = epsg_id) %>% 
    st_geometry() %>% 
    .[[i]] %>% 
    get_r(epsg_id = epsg_id)
}

r4all_loop <- bind_rows(r4all_loop)

r4all_apply <- tract %>%
  st_geometry() %>% 
  st_transform(crs = epsg_id) %>% 
  lapply(., function(x) get_r(x, epsg_id = epsg_id))

r4all_apply <- bind_rows(r4all_apply)

identical(r4all_apply, r4all_loop)

ready_4_yelp <- r4all_apply %>% 
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])

tmap_mode('view')

## tmap mode set to interactive viewing

ready_4_yelp[1:10,] %>% 
  st_buffer(., dist = .$radius) %>% 
  tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +
  tm_shape(tract[1:10,]) + tm_borders(col= 'blue')

# A] Selected business category - Bike Rental
# Using Yelp Business Search function
which_tract <- 1
test <- business_search(api_key = Sys.getenv('yelp_api'), # like we did for census, store your api key
                        categories = 'bikerentals', # return only restaurant businesses
                        latitude = ready_4_yelp$y[which_tract],
                        longitude = ready_4_yelp$x[which_tract],
                        offset = 0, # 1st page, 1st obs
                        radius = round(ready_4_yelp$radius[which_tract]), # radius requires integer value
                        limit = 50) # how many business per page

## No encoding supplied: defaulting to UTF-8.

lapply(test, head)

names(test)

# Finding number of businesses
paste0("is it a data.frame?: ", is.data.frame(test$businesses), ", ",
       " how many rows?: ", nrow(test$businesses), ", ",
       " how many columns?: ", ncol(test$businesses))

# Applying a custom function to getting business data for census tracts
get_yelp <- function(tract, category){
  Sys.sleep(1)
  n <- 1
  # Making the first request
  resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                          categories = category, 
                          latitude = tract$y, 
                          longitude = tract$x, 
                          offset = (n - 1) * 50, # = 0 when n = 1
                          radius = round(tract$radius), 
                          limit = 50)
  # Calculating how many requests are needed in total
  required_n <- ceiling(resp$total/50)
  
  out <- vector("list", required_n)

  out[[n]] <- resp$businesses

  names(out)[n] <- required_n
  
  if (resp$total >= 1000)
  {
    print(glue::glue("{n}th row has >= 1000 businesses."))
    return(out)
  } 
  else 
  {
    n <- n + 1
    # Starting a loop
    while(n <= required_n){
      resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                              categories = category, 
                              latitude = tract$y, 
                              longitude = tract$x, 
                              offset = (n - 1) * 50, 
                              radius = round(tract$radius), 
                              limit = 50)
      
      out[[n]] <- resp$businesses
      
      n <- n + 1
    } 
    # Merging all elements in the list into a single data frame
    out <- out %>% bind_rows()
    
    return(out)
  }
}
yelp_first_tract <- get_yelp(ready_4_yelp[1,], 'bikerentals') %>% as_tibble()

## No encoding supplied: defaulting to UTF-8.

yelp_first_tract %>% print

# Preparing a collector
yelp_all_list <- vector("list", nrow(ready_4_yelp))

# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
  yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "bikerentals"))
  if (row %% 10 == 0){
    print(paste0("Current row: ", row))
  }
}

# Collapsing the list into a data.frame
yelp_all <- yelp_all_list %>% bind_rows() %>% as_tibble()

yelp_all %>% print(width=1000)

# Extracting coordinates
yelp_sf <- yelp_all %>% 
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>% 
  filter(!is.na(x) & !is.na(y)) %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326)

Tidying Data

# Deleting duplicate rows
yelp_unique <- yelp_sf %>% 
  distinct(id, .keep_all=T)
glue::glue("Before dropping duplicated rows, there were {nrow(yelp_sf)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>% 
  print()

# Flattening nested categories column
concate_list <- function(x){
  titles <- x[["title"]] %>% str_c(collapse = ", ")
  return(titles)
}
yelp_flat <- yelp_unique %>% 
  mutate(categories = categories %>% map_chr(concate_list))
yelp_flat %>% print(width = 1000)

# Deleting rows outside Atlanta 
st_crs(tract) <- 4326 # making sure both the tracts and yelp data have the same CRS

## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that

# Converting to sf 
yelp_sfc <- yelp_flat %>% 
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
# subsetting
yelp_in <- yelp_sfc[tract %>% st_union(), ,op = st_intersects]

Appending Census Data

# Joining Census and Yelp Data
yelp_in_tract <- st_join(tract, yelp_in, join = st_intersects)

# Getting count of Bike Rentals in each tract
yelp_count_tract <- count(as_tibble(yelp_in_tract), GEOID) %>%
  print()

test <- st_join(tract, yelp_in %>% mutate(count = 1))
out <- test %>%
  group_by(GEOID) %>%
  summarise(count = sum(count, na.rm = T))

tract_count_join <- tract %>%
  left_join(out %>% st_set_geometry(NULL), by = "GEOID")

Examining the Data

The main purpose of this exercise is to find suitable areas to locate a bike rental store by analysing the gap between bike stores and tracts with bike commuters. The data tidied above was examined below using map and graphs.

# A new variable was introduced for identifying which tracts have more bike commuters. The variable introduced considered the proportion of bike commuters among all commuters in a given tract. This variable provides a better representation on the concentration/demand of bike commuters as compared to absolute numbers. 

# Adding new variable 
yelp_in_tract1 <- mutate(yelp_in_tract, proportion_bikers = (trans.bicycle/trans.total)*100)

# Mapping
tmap_mode("view")

## tmap mode set to interactive viewing

tm_shape(yelp_in_tract1) + tm_polygons("proportion_bikers") + tm_shape(atlanta) + tm_borders(col="blue") + tm_shape(yelp_in) + tm_dots()

# Ideally, a bike rental will have more deman in lower income tracts that have low car ownership. Therefore this graph visualises the distribution of existing bike rental stores in relation to the meam household income

ggplot(tract_count_join, aes(x=hhincome, y=count)) +
  geom_point() +
  ylab("Number of Bike rentals and Household Median Income in Tract")

## Warning: Removed 3 rows containing missing values (`geom_point()`).

# Lastly this graph depicts the distance to the nearest bike rental store for each census tract

dist <- sf::st_distance(yelp_in_tract1, yelp_in)
hist(dist)

Findings

Proportion of bikers and location of existing bike rental stores - the map shows that all the existing bike rental stores were in close proximity to census tracts having a high proportion of bikers [6-8%].
HH income and distribution of existing bike rental stores - the graph shows that majority of the stores were in tracts with mean HH income [between USD 50,000 to 100,000] and emphasises the lack of stores in lower and very high income neighborhoods.
Distance to the nearest bike rental store for each census tract - indicates that majority of the census tracts are withing a distance of 2 - 6 miles form any of the existing bike rental stores.