# tidycensus::census_api_key(Sys.getenv("census_api"))
library(tidycensus)
library(sf)
library(tmap)
library(jsonlite)
library(tidyverse)
library(httr)
library(reshape2)
library(here)
library(yelpr)
library(knitr)
library(skimr)
library(tigris)
tmap_mode("view")
tidycensus::census_api_key(Sys.getenv("census_api_key"))
yelp_api_key = Sys.getenv("yelp_api_key")
For this assignment, imagine that you are a bicycle enthusiast and want to set up a bicycle rental store in a place within the City of Atlanta. The question in your mind is about where this store should be located. You realize that you can get the locations of all bike rental stores from Yelp API (categories: bikerentals). Another data that might help you could be the census data showing how many commuters commute on a bike (which could be a proxy for bike-friendliness of a community and environment). Here you will be looking for places where there is gap between bike stores (rentals in this case - but hopefully these also have other bike related services) and tracts with bike commuters.
To complete this assignment, follow the directions below:
# Function of downloading census data
# Define the variables related to commuting
vars <- c("total" = "B08301_001", # Total
"percent_bike_to_work"="S0801_C01_011",
"bike"="B08301_014",
"drove_alone" = "B08301_003",
"carpooled" = "B08301_004",
"public_transportation" = "B08301_010",
"walked" = "B08301_018",
"other_means" = "B08301_019",
"worked_at_home" = "B08301_021")
census <- suppressMessages(
get_acs(geography = "tract", # or "block group", "county", "state" etc.
state = "GA",
county = c("Fulton", "Dekalb"),
variables = vars,
year = 2021,
survey = "acs5", # American Community Survey 5-year estimate
geometry = TRUE, # returns sf objects
output = "wide") # wide vs. long
)
# Download the Census ACS 5-year estimate data for Census Tracts in Fulton and DeKalb counties
# You may also need to include other variables.
# Save
save(census, file = 'census.RData')
# You may use older Census boundary (e.g., 2019) to reduce the number of Yelp API queries in Step
old_census <- suppressMessages(
get_acs(geography = "tract", # or "block group", "county", "state" etc.
state = "GA",
county = c("Fulton", "Dekalb"),
variables = vars,
year = 2019,
survey = "acs5", # American Community Survey 5-year estimate
geometry = TRUE, # returns sf objects
output = "wide") # wide vs. long
)
# Save
save(old_census, file = 'old_census.RData')
load('census.RData')
# Download the City of Atlanta boundary
atlanta <- places('GA') %>%
filter(NAME == 'Atlanta')
# Save
save(atlanta, file = 'atlanta.RData')
load('atlanta.RData')
# Filter the Census Tracts that either fall within or intersect with the City of Atlanta.
census_in_atlanta <- st_intersection(census, atlanta)
basemap <- tm_shape(census) + tm_borders() + tm_shape(census_in_atlanta) + tm_borders(col = 'red')
basemap
# Save
save(census_in_atlanta, file = 'census_in_atlanta.RData')
old_census_in_atlanta <- st_intersection(old_census, atlanta)
## Error in eval(expr, envir, enclos): object 'old_census' not found
old_basemap <- tm_shape(census) + tm_borders() + tm_shape(census_in_atlanta) + tm_borders(col = 'red')
## Error in eval(expr, envir, enclos): object 'census' not found
tmap_arrange(basemap, old_basemap)
## Error in eval(expr, envir, enclos): object 'basemap' not found
# Download Yelp data on categories = bikerentals for the City of Atlanta.
# FUNCTION
get_yelp <- function(tract, category){
# ----------------------------------
# Gets one row of tract information (1,) and category name (str),
# Outputs a list of business data.frame
Sys.sleep(1)
n <- 1
# First request --------------------------------------------------------------
resp <- business_search(api_key = Sys.getenv("yelp_api_key"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50, # = 0 when n = 1
radius = round(tract$radius),
limit = 50)
# Calculate how many requests are needed in total
required_n <- ceiling(resp$total/50)
# out is where the results will be appended to.
out <- vector("list", required_n)
# Store the business information to nth slot in out
out[[n]] <- resp$businesses
# Change the name of the elements to the total required_n
# This is to know if there are more than 1000 businesses,
# we know how many.
names(out)[n] <- required_n
# Throw error if more than 1000
if (resp$total >= 1000)
{
# glue formats string by inserting {n} with what's currently stored in object n.
print(glue::glue("{n}th row has >= 1000 businesses."))
# Stop before going into the loop because we need to
# break down Census Tract to something smaller.
return(out)
}
else
{
# add 1 to n
n <- n + 1
# Now we know required_n -----------------------------------------------------
# Starting a loop
# pagination
# because we don't know how many times to loop
while(n <= required_n){
resp <- business_search(api_key = Sys.getenv("yelp_api_key"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
} #<< end of while loop
# Merge all elements in the list into a single data frame
out <- out %>% bind_rows()
return(out)
}
}
# Function: Get tract-wise radius
get_r <- function(poly, epsg_id){
#---------------------
# Takes: a single POLYGON or LINESTRTING
# Outputs: distance between the centroid of the boundingbox and a corner of the bounding box
#---------------------
# Get bounding box of a given polygon
bb <- st_bbox(poly)
# Get lat & long coordinates of any one corner of the bounding box.
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
# Get centroid of the bb
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
# Get the distance between bb_p and c
r <- st_distance(bb_corner, bb_center)
# Multiply 1.1 to make the circle a bit larger than the Census Tract.
# See the Yelp explanation of their radius parameter to see why we do this.
bb_center$radius <- r*1.2
return(bb_center)
}
epsg_id <- 4326
r4all_loop <- vector("list", nrow(census_in_atlanta))
## Error in eval(expr, envir, enclos): object 'census_in_atlanta' not found
r4all_apply <- census_in_atlanta %>%
st_geometry() %>%
st_transform(crs = epsg_id) %>%
lapply(., function(x) get_r(x, epsg_id = epsg_id))
## Error in eval(expr, envir, enclos): object 'census_in_atlanta' not found
r4all_apply <- bind_rows(r4all_apply)
## Error in eval(expr, envir, enclos): object 'r4all_apply' not found
# Appending X Y coordinates as seprate columns
ready_4_yelp <- r4all_apply %>%
mutate(x = st_coordinates(.)[,1],
y = st_coordinates(.)[,2])
## Error in eval(expr, envir, enclos): object 'r4all_apply' not found
# Yelp download function
# Prepare a collector
yelp_list <- vector("list", nrow(ready_4_yelp))
## Error in eval(expr, envir, enclos): object 'ready_4_yelp' not found
# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
if (row > 150){
break
}
yelp_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "bikerentals"))
if (row %% 10 == 0){
print(paste0("Current row: ", row))
}
}
## Error in eval(expr, envir, enclos): object 'ready_4_yelp' not found
# Save
yelp_bikes <- yelp_list %>% bind_rows() %>% as_tibble()
## Error in eval(expr, envir, enclos): object 'yelp_list' not found
# print
yelp_bikes %>% print(width=1000)
## Error in eval(expr, envir, enclos): object 'yelp_bikes' not found
save(yelp_bikes, file = 'yelp_bikes.RData')
## Error in save(yelp_bikes, file = "yelp_bikes.RData"): object 'yelp_bikes' not found
load('yelp_bikes.RData')
yelp_bikes %>% print(width=1000)
## # A tibble: 45 × 16
## id alias
## <chr> <chr>
## 1 rbf8bVY0cuqyGZtbn691lg pedego-electric-bikes-atlanta-atlanta-2
## 2 tMNV5bj4rqud0cRRQiPbWA outback-bikes-atlanta
## 3 b3nacMG8PR77GNCaI4RBKA atlanta-bicycle-barn-atlanta
## 4 tMNV5bj4rqud0cRRQiPbWA outback-bikes-atlanta
## 5 rbf8bVY0cuqyGZtbn691lg pedego-electric-bikes-atlanta-atlanta-2
## 6 tMNV5bj4rqud0cRRQiPbWA outback-bikes-atlanta
## 7 tMNV5bj4rqud0cRRQiPbWA outback-bikes-atlanta
## 8 rbf8bVY0cuqyGZtbn691lg pedego-electric-bikes-atlanta-atlanta-2
## 9 rbf8bVY0cuqyGZtbn691lg pedego-electric-bikes-atlanta-atlanta-2
## 10 JkkHRgYj0mvdgbMXFm436w civil-bikes-atlanta
## name
## <chr>
## 1 Pedego Electric Bikes Atlanta
## 2 Outback Bikes
## 3 Atlanta Bicycle Barn
## 4 Outback Bikes
## 5 Pedego Electric Bikes Atlanta
## 6 Outback Bikes
## 7 Outback Bikes
## 8 Pedego Electric Bikes Atlanta
## 9 Pedego Electric Bikes Atlanta
## 10 Civil Bikes
## image_url
## <chr>
## 1 https://s3-media2.fl.yelpcdn.com/bphoto/Z7KlxC0vcyoxOCSxbH3Rrg/o.jpg
## 2 https://s3-media4.fl.yelpcdn.com/bphoto/rnpOTcs3WwjTq1JsfV1b8w/o.jpg
## 3 https://s3-media3.fl.yelpcdn.com/bphoto/Ik2pMce41_MRcg3svjTbSQ/o.jpg
## 4 https://s3-media4.fl.yelpcdn.com/bphoto/rnpOTcs3WwjTq1JsfV1b8w/o.jpg
## 5 https://s3-media2.fl.yelpcdn.com/bphoto/Z7KlxC0vcyoxOCSxbH3Rrg/o.jpg
## 6 https://s3-media4.fl.yelpcdn.com/bphoto/rnpOTcs3WwjTq1JsfV1b8w/o.jpg
## 7 https://s3-media4.fl.yelpcdn.com/bphoto/rnpOTcs3WwjTq1JsfV1b8w/o.jpg
## 8 https://s3-media2.fl.yelpcdn.com/bphoto/Z7KlxC0vcyoxOCSxbH3Rrg/o.jpg
## 9 https://s3-media2.fl.yelpcdn.com/bphoto/Z7KlxC0vcyoxOCSxbH3Rrg/o.jpg
## 10 https://s3-media4.fl.yelpcdn.com/bphoto/JqTLT-chrqtbyuoB-52gdw/o.jpg
## is_closed
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## url
## <chr>
## 1 https://www.yelp.com/biz/pedego-electric-bikes-atlanta-atlanta-2?adjust_crea…
## 2 https://www.yelp.com/biz/outback-bikes-atlanta?adjust_creative=aOMz3ElZ4MnHC…
## 3 https://www.yelp.com/biz/atlanta-bicycle-barn-atlanta?adjust_creative=aOMz3E…
## 4 https://www.yelp.com/biz/outback-bikes-atlanta?adjust_creative=aOMz3ElZ4MnHC…
## 5 https://www.yelp.com/biz/pedego-electric-bikes-atlanta-atlanta-2?adjust_crea…
## 6 https://www.yelp.com/biz/outback-bikes-atlanta?adjust_creative=aOMz3ElZ4MnHC…
## 7 https://www.yelp.com/biz/outback-bikes-atlanta?adjust_creative=aOMz3ElZ4MnHC…
## 8 https://www.yelp.com/biz/pedego-electric-bikes-atlanta-atlanta-2?adjust_crea…
## 9 https://www.yelp.com/biz/pedego-electric-bikes-atlanta-atlanta-2?adjust_crea…
## 10 https://www.yelp.com/biz/civil-bikes-atlanta?adjust_creative=aOMz3ElZ4MnHCyS…
## review_count categories rating coordinates$latitude $longitude transactions
## <int> <list> <dbl> <dbl> <dbl> <list>
## 1 15 <df [3 × 2]> 4.5 33.7 -84.4 <list [0]>
## 2 103 <df [3 × 2]> 4 33.8 -84.3 <list [0]>
## 3 128 <df [3 × 2]> 4.5 33.8 -84.4 <list [0]>
## 4 103 <df [3 × 2]> 4 33.8 -84.3 <list [0]>
## 5 15 <df [3 × 2]> 4.5 33.7 -84.4 <list [0]>
## 6 103 <df [3 × 2]> 4 33.8 -84.3 <list [0]>
## 7 103 <df [3 × 2]> 4 33.8 -84.3 <list [0]>
## 8 15 <df [3 × 2]> 4.5 33.7 -84.4 <list [0]>
## 9 15 <df [3 × 2]> 4.5 33.7 -84.4 <list [0]>
## 10 11 <df [2 × 2]> 4.5 33.7 -84.4 <list [0]>
## location$address1 $address2 $address3 $city $zip_code $country $state
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 "414 Bill Kennedy Way" "Ste 101" <NA> Atlanta 30316 US GA
## 2 "484 Moreland Ave NE" "Ste E" "" Atlanta 30307 US GA
## 3 "151 Sampson St NE" "" "" Atlanta 30312 US GA
## 4 "484 Moreland Ave NE" "Ste E" "" Atlanta 30307 US GA
## 5 "414 Bill Kennedy Way" "Ste 101" <NA> Atlanta 30316 US GA
## 6 "484 Moreland Ave NE" "Ste E" "" Atlanta 30307 US GA
## 7 "484 Moreland Ave NE" "Ste E" "" Atlanta 30307 US GA
## 8 "414 Bill Kennedy Way" "Ste 101" <NA> Atlanta 30316 US GA
## 9 "414 Bill Kennedy Way" "Ste 101" <NA> Atlanta 30316 US GA
## 10 "" <NA> "" Atlanta 30312 US GA
## $display_address phone display_phone distance price
## <list> <chr> <chr> <dbl> <chr>
## 1 <chr [3]> +14049753915 (404) 975-3915 7824. <NA>
## 2 <chr [3]> +14046884878 (404) 688-4878 557. $$
## 3 <chr [2]> +17708732413 (770) 873-2413 646. $$
## 4 <chr [3]> +14046884878 (404) 688-4878 1276. $$
## 5 <chr [3]> +14049753915 (404) 975-3915 732. <NA>
## 6 <chr [3]> +14046884878 (404) 688-4878 7892. $$
## 7 <chr [3]> +14046884878 (404) 688-4878 11842. $$
## 8 <chr [3]> +14049753915 (404) 975-3915 8187. <NA>
## 9 <chr [3]> +14049753915 (404) 975-3915 1453. <NA>
## 10 <chr [1]> +14043238754 (404) 323-8754 1455. <NA>
## # ℹ 35 more rows
However, even when you’ve stopped using the code for downloading Yelp data, do not delete them from your RMD; instructors need to see the entire code. You can use code chunk option {r, eval=FALSE} to keep the code for downloading Yelp in your RMD but not actually run it. Read this to learn more.
load('bikes.RData')
# Deleting duplicated rows.
drop_duplicates <- function(df, name) {
unique_df <- df %>% distinct(id, .keep_all=T)
print(glue::glue("Before dropping duplicated rows, there were {nrow(df)} rows in {name}. After dropping them, there are {nrow(unique_df)} rows."))
return(unique_df)
}
# Note: The distinct function from the ‘dplyr’ package keeps the first occurrence of each duplicated row and removes the rest.
drop_dup_bikes <- drop_duplicates(yelp_bikes, 'id')
## Before dropping duplicated rows, there were 45 rows in id. After dropping them, there are 9 rows.
# Flattening nested columns that have multiple variables in one column. Pay particular attention to the “category” column.
concate_list <- function(x){
# x is a data frame with columns "alias" and "title" from Yelp$categories
# returns a character vector containing category concatenated titles
titles <- x[["title"]] %>% str_c(collapse = ", ")
return(titles)
}
flatten <- function(df, col){
flat_df <- df %>%
mutate({{col}} := map_chr(.data[[col]], concate_list))
return(flat_df)
}
flat1_bikes <- flatten(drop_dup_bikes, 'categories')
flat2_bikes <- flat1_bikes %>%
jsonlite::flatten() %>%
as_tibble()
flat2_bikes$coordinates.latitude %>% head()
## [1] 33.74172 33.76756 33.75833 33.74468 33.80578 33.78293
# Delete rows that have missing data in coordinates variable and other variables of interest. It’s okay to have NAs in variables that are not of interest.
# Fist, let's verify that the 4 missing values in lat/long columns are in the same rows.
identical(is.na(flat2_bikes$coordinates.latitude),
is.na(flat2_bikes$coordinates.longitude)) # Yes, they are in the same 4 rows.
## [1] TRUE
# Drop them.
dropna1_bikes <- flat2_bikes %>%
drop_na(coordinates.longitude)
# Dropping NAs in price
dropna2_bikes <- dropna1_bikes %>%
drop_na(price)
# Delete rows that fall outside of the City of Atlanta boundary.
sf_bikes <- dropna1_bikes %>%
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
# Filter the points
in_atlanta_bikes <- st_intersection(sf_bikes, census_in_atlanta)
## Error in eval(expr, envir, enclos): object 'census_in_atlanta' not found
save(in_atlanta_bikes, file = "in_atlanta_bikes.RData")
## Error in save(in_atlanta_bikes, file = "in_atlanta_bikes.RData"): object 'in_atlanta_bikes' not found
map_eda <- tm_shape(in_atlanta_bikes) + tm_dots(size=0.05, col = "review_count", style="quantile", palette = "Blues")
## Error in eval(expr, envir, enclos): object 'in_atlanta_bikes' not found
map_eda
## Error in eval(expr, envir, enclos): object 'map_eda' not found
# Examine the associations among the variables bike rentals and bike commuting.
skim(in_atlanta_bikes)
## Error in eval(expr, envir, enclos): object 'in_atlanta_bikes' not found
load("in_atlanta_bikes.RData")
load("census_in_atlanta.RData")
census_in_atlanta <- st_set_crs(census_in_atlanta, 4326)
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
# proportion
bike_percentage <- census_in_atlanta$bikeM / census_in_atlanta$totalM
census_in_atlanta$bike_percentage <- bike_percentage
# density: bike store/ census tract
indices <- st_intersects(in_atlanta_bikes, census_in_atlanta)
counts <- sapply(indices, length)
areas <- st_area(census_in_atlanta)
density <- counts / as.numeric(areas)
## Warning in counts/as.numeric(areas): longer object length is not a multiple of
## shorter object length
# Add the density to the census_tract data frame as a new column
census_in_atlanta$density <- density
# plot map
bike_store_density_map <- tm_shape(census_in_atlanta)+tm_polygons(col = "density", style="quantile", palette = "Blues")
bike_percentage_map <- tm_shape(census_in_atlanta)+tm_polygons(col = "bike_percentage", style="quantile")
tmap_arrange(bike_store_density_map, bike_percentage_map)
## Legend labels were too wide. The labels have been resized to 0.44, 0.44, 0.44, 0.44, 0.03. Increase legend.width (argument of tm_layout) to make the legend wider and therefore the labels larger.
# plot graph
ggplot(census_in_atlanta, aes(x = density, y = bike_percentage)) +
geom_point() +
labs(x = "Bike Store Density", y = "Bike Commute Percentage", title = "Scatter Plot of Bike Store Density vs Bike Commute Percentage") +
theme_minimal()
# plot graph
ggplot(census_in_atlanta, aes(x = density, y = bike_percentage)) +
geom_point() +
labs(x = "Bike Store Density", y = "Bike Commute Percentage", title = "Scatter Plot of Bike Store Density vs Bike Commute Percentage") +
theme_minimal()