1. City of Interest: Marietta, GA

City of Marietta is located northwest to the city of Atlanta and the county seat of Cobb County. According to US Census 2020, Marietta had a population of 60,792 in 2020, making it the 14th largest city in the State of Georgia. The city of Marietta intersects with 34 Census Tracts. To identify Census Tracts that intersects with the geographic extent of Marietta and visualize them, we can follow these steps using R.

1) Load Required Parkages

# Load Required Packages
library(tidycensus)
library(tidyverse)
library(tmap)
library(sf)

2) Load Census API

# Load Census API
tidycensus::census_api_key(Sys.getenv("census_api"))

3) Load Geometries: Census Tracts within Cobb County (ACS), Marietta (Tigris)

# Load the geometry of Census Tracts within Cobb County from ACS
tract <- suppressMessages(
  get_acs(geography = "tract", 
          state = "GA",
          county = c("Cobb"), # Marietta is the county seat of Cobb County 
          variables = c(hhincome = 'B19019_001'), #Must insert a variable to load ACS Data
          year = 2021,
          survey = "acs5", # ACS 5-year estimate
          geometry = TRUE, # Returns sf objects
          output = "wide") 
)

# Load the geometry of Marietta
marietta <- tigris::places('GA') %>% 
  filter(NAME == 'Marietta') 

4) Identify Census Tracts that intersects with the geographic extent of Marietta

By running the below codes, we can identify that there are 34 Census Tracts that intersects with the geographic extent of Marietta.

# Identify Census Tracts that intersects with the geographic extent of Marietta
tract_marietta <- tract[marietta, ]

# Calculate Numbers of Census Tracts that intersects with Marietta
message(sprintf("nrow: %s, ncol: %s", nrow(tract_marietta), ncol(tract_marietta)))
## nrow: 34, ncol: 5

5) Visualize Census Tracts and Marietta

# visualize Census tracts that intersects with Marietta along with the City boundary of Marietta
tract_marietta <- tract_marietta %>% 
  select(GEOID, 
         hhincome = hhincomeE) # change the column name hhincomeE to hhincome
tmap_mode("view") # switch to interactive viewing
tm_shape(tract_marietta) + tm_borders(lwd = 2) + # visualize census tracts and Ma
  tm_shape(marietta) + tm_polygons(col = 'red', alpha = 0.4)

2. Business of Interest: Skincare, Day Spa

According to Data USA, Marietta’s young and affluent consumer base is reflected in the City’s median age of 35.2 and a median household income of $67,589 in 2022. Accordingly, I wish to navigate the landscape of two businesses, skincare and day spa, which is highly preferred by young, high-income consumers. We can follow these steps using R to understand the spatial and business analytics dynamics of skincare and day spa businesses in Marietta.

1) Create a custom function to identify Census-tract-wise Radius

To define a search area for Yelpr within 34 census tracts, we use a custom function to create a buffer around each census tract by calculating the centroid of each census tract and the distance of a corner of the bounding box (bb) to the centroid.

# Calibrate a function to identify census-tract-wise radius
get_r <- function(poly, epsg_id){ 
    bb <- st_bbox(poly) # Get bb of a given polygon
  bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id) # Lat/lon of a corner of the bb
  # Centroid of the bb
  bb_center_x <- (bb[3]+bb[1])/2
  bb_center_y <- (bb[4]+bb[2])/2
  bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
  r <- st_distance(bb_corner, bb_center) # r = distance between bb_p and c
  bb_center$radius <- r*1.1 # Multiply 1.1 to make the circle larger than the Census Tract.
  return(bb_center)
}

2) Run the custom function to identify circular buffers around each census tract

# Create buffers for all census tracts
epsg_id <- 4326 # To define GCS
r4all_apply <- tract_marietta %>% # Apply function with laaply to create buffers for each census tracts
  st_geometry() %>% 
  st_transform(crs = epsg_id) %>% 
  lapply(., function(x) get_r(x, epsg_id = epsg_id))
r4all_apply <- bind_rows(r4all_apply) # combines all results returned by lapply into a single data frame

# Save lat/lon at separate columns
ready_4_yelp <- r4all_apply %>% 
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])

3) Visualize the buffers

tmap_mode('view') # Activate interactive viewing Mode
ready_4_yelp %>% # Visualize the buffers of 34 census tracts
  st_buffer(., dist = .$radius) %>% # Radius of the buffer is the radius we just calculated using loop
  tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +   # Buffers will be shown in Red
  tm_shape(tract_marietta) + tm_borders(col= 'blue') # Census Tract boundaries will be shown in Blue

Now, by using this buffer, we can follow these steps using Yelpr package from R to retrieve all businesses within the defined search area from the Yelp database.

4) Load Yelpr Parkages

# load required Packaged
library(yelpr)

5) Create a custom function to search businesses within the buffer

# FUNCTION
get_yelp <- function(tract, category){ # according to tract and category, create a list of business dataframe
  Sys.sleep(1)
  n <- 1
  # 1st request --------------------------------------------------------------
  resp <- business_search(api_key = Sys.getenv("yelp_api_2"), 
                          categories = category, 
                          latitude = tract$y, 
                          longitude = tract$x, 
                          offset = (n - 1) * 50, # = 0 when n = 1
                          radius = round(tract$radius), # buffer
                          limit = 50)
  
  required_n <- ceiling(resp$total/50)  # Identify Total Requests
  
  out <- vector("list", required_n) # Creates an empty list to store the results
  out[[n]] <- resp$businesses # Stores the results in the list
  names(out)[n] <- required_n # Sets the name of the list element to required_n to track the number of pages
  
  if (resp$total >= 500) # Show error if more than 500
  {
    print(glue::glue("{n}th row has >= 500 businesses.")) # print an error result
    return(out) # no additional request, stop the loop
  } 
  else 
  {
    n <- n + 1 # add 1 to n
    while(n <= required_n){ # While-loop
      resp <- business_search(api_key = Sys.getenv("yelp_api_2"), 
                              categories = category, 
                              latitude = tract$y, 
                              longitude = tract$x, 
                              offset = (n - 1) * 50, 
                              radius = round(tract$radius), 
                              limit = 50)
      out[[n]] <- resp$businesses
      n <- n + 1
    } # Loop Through Remaining Pages
    
    out <- out %>% bind_rows() # Merge all elements in the list into a single data frame
    return(out)
  }
}

6) Run the custom function: Skincare

# Apply the function for the first Census Tract
yelp_skincare_all_list <- vector("list", nrow(ready_4_yelp)) # Make a vector to store the data

for (row in 1:nrow(ready_4_yelp)){ # Looping through all Census Tracts (for-loop)
  yelp_skincare_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "skincare"))
  print(paste0("Current row: ", row))
}

yelp_skincare_all <- yelp_skincare_all_list %>% bind_rows() %>% as_tibble() # Merge all elements in the list into a single data frame
yelp_skincare_all %>% print(width=1000) #print

From this task, we can identify there are 538 Skincare businesses located in Marietta.

message(sprintf("nrow: %s, ncol: %s", nrow(yelp_skincare_all), ncol(yelp_skincare_all)))
## nrow: 538, ncol: 18

7) Run the custom function: Day Spa

yelp_spas_all_list <- vector("list", nrow(ready_4_yelp)) # Make a vector to store the data


for (row in 1:nrow(ready_4_yelp)){ # Looping through all Census Tracts (for-loop)
  yelp_spas_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "spas"))
  print(paste0("Current row: ", row))
}

yelp_spas_all <- yelp_spas_all_list %>% bind_rows() %>% as_tibble() # Merge all elements in the list into a single data frame
yelp_spas_all %>% print(width=1000) # print

From this task, we can identify there are 180 Skincare businesses located in Marietta.

message(sprintf("nrow: %s, ncol: %s", nrow(yelp_spas_all), ncol(yelp_spas_all)))
## nrow: 180, ncol: 18

8) Summarize the result

Also, we can identify there are 718 Day Spa and Skincare businesses located in Marietta by combining two database.

yelp_two_biz_all <- bind_rows(yelp_skincare_all, yelp_spas_all)
message(sprintf("nrow: %s, ncol: %s", nrow(yelp_two_biz_all), ncol(yelp_two_biz_all)))
## nrow: 718, ncol: 18

9) Visualization of Business Locations: Preparation

To visualize locations of Day Spa and Skincare businesses located in Marietta, we need to convert the data frames to spatial data frames.

# Converting data frame into spatial data frame
yelp_skincare_sf <- yelp_skincare_all %>% 
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>% 
  filter(!is.na(x) & !is.na(y)) %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326)

yelp_spas_sf <- yelp_spas_all %>% 
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>% 
  filter(!is.na(x) & !is.na(y)) %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326)

10) Visualization of Business Locations: Skincare

The visualization indicates that most skincare shops are located in downtown Marietta as well as commercial districts along I-75. Especially, shops with higher review counts are primarily situated within downtown and northern Marietta.

# Map
tm_shape(yelp_skincare_sf) +
  tm_dots(col = "review_count", style="pretty")

11) Visualization of Business Locations: Day Spa

A similar pattern is shown for day spa businesses. Many day spa shops are found in downtown Marietta and commercial districts along I-75, and spa shops with higher review counts spas are located in downtown as well as the western Marietta. We might also say that, given the similarities in their locations, many skincare and day spa shops cluster together in strategic business districts.

# Map
tm_shape(yelp_spas_sf) +
  tm_dots(col = "review_count", style="pretty")

12) Optional: Verifying geolocation data

If you would like to check whether the locations of skincare and day spa businesses pulled from the Yelp Fusion database are within the Marietta buffers, you may proceed with the following step in R.

# Define a buffer
buffer_ready_4_yelp <- st_buffer(ready_4_yelp, dist = ready_4_yelp$radius)

12-1) Verifying geolocation data: Skincare

# Filter the geospatial dataframe using the buffer: Skincare
inside_points_skincare <- yelp_skincare_sf %>%
  filter(rowSums(st_within(., buffer_ready_4_yelp, sparse = FALSE)) > 0)

outside_points_skincare <- yelp_skincare_sf %>%
  filter(rowSums(st_within(., buffer_ready_4_yelp, sparse = FALSE)) == 0)

# Logical operation to identify locations outside of the buffer
if (nrow(outside_points_skincare) > 0) {
  cat(nrow(outside_points_skincare), "locations are outside the buffer.\n")
  
  # If yes: filter out locations outside of the buffer
  filtered_yelp_skincare_sf <- inside_points_skincare
  
  # If yes: visualize the result
  tm_shape(filtered_yelp_skincare_sf) + 
    tm_dots(col = "rating", style = "pretty") +
    tm_shape(buffer_ready_4_yelp) + 
    tm_polygons(alpha = 0.05, col = NA, border.col = 'grey')
  
} else { # If no
  cat("All skincare businesses are located in Marietta.\n")
}
## 94 locations are outside the buffer.

12-2) Verifying geolocation data: Day Spa

# Filter the geospatial dataframe using the buffer: Day Spa
inside_points_spas <- yelp_spas_sf %>%
  filter(rowSums(st_within(., buffer_ready_4_yelp, sparse = FALSE)) > 0)

outside_points_spas <- yelp_spas_sf %>%
  filter(rowSums(st_within(., buffer_ready_4_yelp, sparse = FALSE)) == 0)

# Logical operation to identify locations outside of the buffer
if (nrow(outside_points_spas) > 0) {
  cat(nrow(outside_points_spas), "locations are outside the buffer.\n")
  
  # If yes: filter out locations outside of the buffer
  filtered_yelp_spas_sf <- inside_points_spas
  
  # If yes: visualize the result
  tm_shape(filtered_yelp_spas_sf) + 
    tm_dots(col = "rating", style = "pretty") +
    tm_shape(buffer_ready_4_yelp) + 
    tm_polygons(alpha = 0.05, col = NA, border.col = 'grey')
  
} else { # If no
  cat("All day spa businesses are located in Marietta.\n")
}
## 27 locations are outside the buffer.

Reference

US Census (n.d.) American Community Survey 2022: 5-year estimates.

Data USA (n.d.) Data USA: Marietta, GA. https://datausa.io/profile/geo/marietta-ga/