Step 1 Download the census data

tidycensus::census_api_key(Sys.getenv("census_api"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
# Download the Census ACS 5-year estimate data for Census Tracts
tract <- suppressMessages(
  get_acs(geography = "tract", # or "block group", "county", "state" etc.
          state = "GA",
          county = c("Fulton", "Dekalb"),
          variables = c(hhincome = 'B19019_001',
                        race.tot = "B02001_001",
                        race.white = "B02001_002",
                        race.black = "B02001_003",
                        trans.total = "B08006_001",
                        trans.car = "B08006_002",
                        trans.drovealone = "B08006_003",
                        trans.carpooled = "B08006_004",
                        trans.pubtrans = "B08006_008",
                        trans.bicycle = "B08006_014",
                        trans.walk = "B08006_015",
                        trans.WfH = "B08006_017",
                        med_housexp = "B25104_001",
                        med_realestate_taxes = "B25103_001"
          ),
          year = 2019,
          survey = "acs5", # American Community Survey 5-year estimate
          geometry = TRUE, # returns sf objects
          output = "wide") # wide/long
)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
atlanta <- places('GA') %>% 
  filter(NAME == 'Atlanta') 
## Retrieving data for the year 2021
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |==                                                                    |   4%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |=======================                                               |  34%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |============================================                          |  64%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |===================================================                   |  74%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
tract <- tract[atlanta,]

# View the data
message(sprintf("nrow: %s, ncol: %s", nrow(tract), ncol(tract)))
## nrow: 161, ncol: 31
# Retaining only those I want.
# Notice that select function can also change names when it selects columns.
tract <- tract %>% 
  select(GEOID, 
         hhincome = hhincomeE, # New name = old name
         race.tot = race.totE,
         race.white = race.whiteE,
         race.black = race.blackE,
         trans.total = trans.totalE,
         trans.car = trans.carE,
         trans.drovealone = trans.drovealoneE,
         trans.carpooled = trans.carpooledE,
         trans.pubtrans = trans.pubtransE,
         trans.bicycle = trans.bicycleE,
         trans.walk = trans.walkE,
         trans.WfH = trans.WfHE,
         med_housexp = med_housexpE,
         med_realestate_taxes = med_realestate_taxesE)

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tract) + tm_borders() + tm_shape(atlanta) + tm_borders(col = 'red')

Step 2 Prepare census data for yelp

# Function: Get tract-wise radius
get_r <- function(poly, epsg_id){
  #---------------------
  # Takes: a single POLYGON or LINESTRTING
  # Outputs: distance between the centroid of the boundingbox and a corner of the bounding box
  #---------------------
  
  # Get bounding box of a given polygon
  bb <- st_bbox(poly)
  # Get lat & long coordinates of any one corner of the bounding box.
  bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
  # Get centroid of the bb
  bb_center_x <- (bb[3]+bb[1])/2
  bb_center_y <- (bb[4]+bb[2])/2
  bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
  
  # Get the distance between bb_p and c
  r <- st_distance(bb_corner, bb_center)
  # Multiply 1.1 to make the circle a bit larger than the Census Tract.
  # See the Yelp explanation of their radius parameter to see why we do this.
  bb_center$radius <- r*1.2
  return(bb_center)
}

## Using a loop -----------------------------------------------------------------
# Creating an empty vector of NA. 
# Results will fill this vector
epsg_id <- 4326

r4all_loop <- vector("list", nrow(tract))

# Starting a for-loop

for (i in 1:nrow(tract)){
  r4all_loop[[i]] <- tract %>% 
    st_transform(crs = epsg_id) %>% 
    st_geometry() %>% 
    .[[i]] %>% 
    get_r(epsg_id = epsg_id)
}

r4all_loop <- bind_rows(r4all_loop)


# Using a functional -----------------------------------------------------------
# We use a functional (sapply) to apply this custom function to each Census Tract.
r4all_apply <- tract %>%
  st_geometry() %>% 
  st_transform(crs = epsg_id) %>% 
  lapply(., function(x) get_r(x, epsg_id = epsg_id))

r4all_apply <- bind_rows(r4all_apply)

# Are these two identical?
identical(r4all_apply, r4all_loop)
## [1] TRUE
# Appending X Y coordinates as seprate columns
ready_4_yelp <- r4all_apply %>% 
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])

tmap_mode('view')
## tmap mode set to interactive viewing
# Select the first 10 rows
ready_4_yelp[1:10,] %>% 
  # Draw a buffer centered at the centroid of Tract polygons.
  # Radius of the buffer is the radius we just calculated using loop
  st_buffer(., dist = .$radius) %>% 
  # Display this buffer in red
  tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +
  # Display the original polygon in blue
  tm_shape(tract[1:10,]) + tm_borders(col= 'blue')

Step3 Download yelp data

# FUNCTION
get_yelp <- function(tract, category){
  # Gets one row of tract information (1,) and category name (str),
  # Outputs a list of business data.frame
  Sys.sleep(1)
  n <- 1
  # First request --------------------------------------------------------------
  resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                          categories = category, 
                          latitude = tract$y, 
                          longitude = tract$x, 
                          offset = (n - 1) * 50, # = 0 when n = 1
                          radius = round(tract$radius), 
                          limit = 50)
  # Calculate how many requests are needed in total
  required_n <- ceiling(resp$total/50)
  
  # out is where the results will be appended to.
  out <- vector("list", required_n)
  
  # Store the business information to nth slot in out
  out[[n]] <- resp$businesses
  
  # Change the name of the elements to the total required_n
  # This is to know if there are more than 1000 businesses,
  # we know how many.
  names(out)[n] <- required_n
  
  # Throw error if more than 500
  if (resp$total >= 500)
  {
    # glue formats string by inserting {n} with what's currently stored in object n.
    print(glue::glue("{n}th row has >= 1000 businesses."))
    # Stop before going into the loop because we need to
    # break down Census Tract to something smaller.
    return(out)
  } 
  else 
  {
    # add 1 to n
    n <- n + 1
    
    # Now we know required_n -----------------------------------------------------
    # Starting a loop
    while(n <= required_n){
      resp <- business_search(api_key = Sys.getenv("yelp_api"), 
                              categories = category, 
                              latitude = tract$y, 
                              longitude = tract$x, 
                              offset = (n - 1) * 50, 
                              radius = round(tract$radius), 
                              limit = 50)
      
      out[[n]] <- resp$businesses
      
      n <- n + 1
    } #<< end of while loop
    
    # Merge all elements in the list into a single data frame
    out <- out %>% bind_rows()
    
    return(out)
  }
}

# Apply the function for the first Census Tract
#yelp_first_tract <- get_yelp(ready_4_yelp[1,], "bikerentals") %>% 
#  as_tibble()

# Print
#yelp_first_tract %>% print

# Prepare a collector
yelp_all_list <- vector("list", nrow(ready_4_yelp))

# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
  yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "bikerentals"))
  if (row %% 10 == 0){
    print(paste0("Current row: ", row))
  }
}

# Collapsing the list into a data.frame
yelp_all <- yelp_all_list %>% bind_rows() %>% as_tibble()

# print
yelp_all %>% print(width=1000)

# Extract coordinates
yelp_sf <- yelp_all %>% 
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>% 
  filter(!is.na(x) & !is.na(y)) %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326)

# Map
tm_shape(yelp_sf) +
  tm_dots(col = "review_count", style="quantile")
write_rds(yelp_all, file = 'C:/Users/49765/Desktop/Urban Analytics/mini3/yelp_all.rds')
st_write(atlanta,"atlanta.geojson",
         delete_layer=TRUE)

Step4 Data cleaning

# Read the full data
my_yelp <- read_rds('C:/Users/49765/Desktop/Urban Analytics/mini3/yelp_all.rds')
yelp_unique <- my_yelp %>% 
  distinct(id, .keep_all=T)

glue::glue("Before dropping duplicated rows, there were {nrow(my_yelp)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>% 
  print()
## Before dropping duplicated rows, there were 91 rows. After dropping them, there are 10 rows
concate_list <- function(x){
  # x is a data frame with columns "alias" and "title" from Yelp$categories
  # returns a character vector containing category concatenated titles 
  titles <- x[["title"]] %>% str_c(collapse = ", ")
  return(titles)
}

yelp_flat <- yelp_unique %>% 
  # 1. Flattening columns with data frame
  jsonlite::flatten() %>% 
  # 2. Handling list-columns
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         categories = categories %>% map_chr(concate_list)) # concate_list is the custom function
# Identify NA values
yelp_flat %>% 
  map_dbl(., function(x) sum(is.na(x))) 
##                       id                    alias                     name 
##                        0                        0                        0 
##                image_url                is_closed                      url 
##                        0                        0                        0 
##             review_count               categories                   rating 
##                        0                        0                        0 
##             transactions                    phone            display_phone 
##                        0                        0                        0 
##                 distance                    price     coordinates.latitude 
##                        0                        6                        0 
##    coordinates.longitude        location.address1        location.address2 
##                        0                        1                        3 
##        location.address3            location.city        location.zip_code 
##                        3                        0                        0 
##         location.country           location.state location.display_address 
##                        0                        0                        0
  # map_dbl is a variant of map() which outputs 
  # numeric vector rather than a list.
# Fist, let's verify that the  missing values in lat/long columns are in the same rows.
identical(is.na(yelp_flat$coordinates.latitude),
          is.na(yelp_flat$coordinates.longitude)) # Yes, they are in the same 4 rows.
## [1] TRUE
# Drop them.
yelp_dropna1 <- yelp_flat %>% 
  drop_na(coordinates.longitude)

We only have 4 shops left after drop the Na in column price, so we are not going to use yelp_dropna2.

# Dropping NAs in price
yelp_dropna2 <- yelp_dropna1 %>% 
  drop_na(price)
# census boundary
census <- st_read('C:/Users/49765/Desktop/Urban Analytics/mini3/atlanta.geojson')
## Reading layer `atlanta' from data source 
##   `C:\Users\49765\Desktop\Urban Analytics\mini3\atlanta.geojson' 
##   using driver `GeoJSON'
## Simple feature collection with 1 feature and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.55085 ymin: 33.64781 xmax: -84.28956 ymax: 33.88682
## Geodetic CRS:  NAD83
# Converting yelp_dropna1 into a sf object
yelp_sf <- yelp_dropna1 %>% 
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)

atlanta <- st_transform(atlanta, crs = st_crs(yelp_sf))

# sf subsets
yelp_in <- yelp_sf[atlanta, ,op = st_intersects]
tract <- st_transform(tract, crs = st_crs(yelp_sf))

# census is currently sfc. Convert it to sf.
tract_sf <- tract %>% filter() %>% st_sf()

# Spatial join
yelp_census <- st_join(yelp_in, tract_sf, join = st_intersects)
census_yelp <- st_join(tract_sf, yelp_in, join = st_intersects)

Step5 Derive insights from data

1. An overview of where the bikerentals are.

# Join tract geometry with the number of bikerentals in tract
test <- st_join(tract_sf, yelp_in %>% mutate(count = 1))
out <- test %>%
  group_by(GEOID) %>%
  summarise(count = sum(count, na.rm = T))

# Lets' check to see if the polygons and the poin data on bikerentals match
tm_shape(out) + tm_polygons(col = "count") + tm_shape(yelp_in)  + tm_dots()

5. Overview

The overall location of current bikerentals in Atlanta is appropriate. However, two of the stores are sited in locations that have a high number of walkers and a low number of riders, and do not receive high ratings. This is not quite explainable. Therefore, we can analyze the cycling rate.

boxplot(trans.bicycle/trans.total~bikerentals, data=tract_sf_bike, main="Boxplot of bikerentals by trans.bicycle", xlab="Whether bikerentals are present", ylab="trans.bicycle/trans.total")

boxplot(trans.car/trans.total~bikerentals, data=tract_sf_bike, main="Boxplot of bikerentals by trans.bicycle", xlab="Whether bikerentals are present", ylab="trans.car/trans.total")