library(tidycensus)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.3.2, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
library(jsonlite)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag() masks stats::lag()
library(httr)
library(jsonlite)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(here)
## here() starts at C:/Users/iskim/Dropbox (GaTech)/2022-2023/2022 Fall/CP 8883 Intro to Urban Analytics/UA_module1
# devtools::install_github("OmaymaS/yelpr")
library(yelpr)
library(knitr)
sessionInfo()
## R version 4.2.1 (2022-06-23 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22000)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] knitr_1.39 yelpr_0.1.0 here_1.0.1 reshape2_1.4.4
## [5] httr_1.4.4 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.9
## [9] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.7
## [13] ggplot2_3.3.6 tidyverse_1.3.1 jsonlite_1.8.0 tmap_3.3-3
## [17] sf_1.0-7 tidycensus_1.2.2
##
## loaded via a namespace (and not attached):
## [1] fs_1.5.2 lubridate_1.8.0 RColorBrewer_1.1-3 rprojroot_2.0.3
## [5] tools_4.2.1 backports_1.4.1 bslib_0.3.1 utf8_1.2.2
## [9] rgdal_1.5-32 R6_2.5.1 KernSmooth_2.23-20 DBI_1.1.3
## [13] colorspace_2.0-3 raster_3.5-21 withr_2.5.0 sp_1.5-0
## [17] tidyselect_1.1.2 leaflet_2.1.1 compiler_4.2.1 leafem_0.2.0
## [21] cli_3.3.0 rvest_1.0.2 xml2_1.3.3 sass_0.4.1
## [25] scales_1.2.0 classInt_0.4-7 proxy_0.4-27 rappdirs_0.3.3
## [29] digest_0.6.29 foreign_0.8-82 rmarkdown_2.14 base64enc_0.1-3
## [33] dichromat_2.0-0.1 pkgconfig_2.0.3 htmltools_0.5.2 dbplyr_2.2.1
## [37] fastmap_1.1.0 htmlwidgets_1.5.4 rlang_1.0.5 readxl_1.4.0
## [41] rstudioapi_0.13 jquerylib_0.1.4 generics_0.1.3 crosstalk_1.2.0
## [45] magrittr_2.0.3 Rcpp_1.0.8.3 munsell_0.5.0 fansi_1.0.3
## [49] abind_1.4-5 lifecycle_1.0.1 terra_1.5-34 stringi_1.7.6
## [53] leafsync_0.1.0 yaml_2.3.5 plyr_1.8.7 tmaptools_3.1-1
## [57] grid_4.2.1 maptools_1.1-4 parallel_4.2.1 crayon_1.5.1
## [61] lattice_0.20-45 stars_0.5-5 haven_2.5.0 hms_1.1.1
## [65] pillar_1.7.0 uuid_1.1-0 codetools_0.2-18 reprex_2.0.1
## [69] XML_3.99-0.10 glue_1.6.2 evaluate_0.15 modelr_0.1.8
## [73] png_0.1-7 vctrs_0.4.1 tzdb_0.3.0 cellranger_1.1.0
## [77] gtable_0.3.0 assertthat_0.2.1 xfun_0.30 lwgeom_0.2-8
## [81] broom_1.0.0 e1071_1.7-11 class_7.3-20 viridisLite_0.4.0
## [85] tigris_1.6.1 units_0.8-0 ellipsis_0.3.2
## Load my Census API key
load(here('data', 'census_api_key.RData')) # Load the API key save in my local machine
census_api_key(census.api.key) %>% suppressMessages() # Activate the key in this R session
rm(census.api.key) # Remove from the environment
## Download tract polygons in Gwinnett
tract <- get_acs(geography = "tract",
state = "GA",
county = c("Gwinnett"),
variables = c(hhincome = "B19019_001",
race.tot = "B02001_001",
race.white = "B02001_002",
race.black = "B02001_003"
),
year = 2019,
survey = "acs5",
geometry = T,
output = "wide"
) %>% suppressMessages()
##
|
| | 0%
|
|= | 1%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|======================= | 33%
|
|======================== | 34%
|
|========================= | 35%
|
|========================== | 37%
|
|=========================== | 38%
|
|============================ | 39%
|
|============================ | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|============================================ | 62%
|
|============================================= | 64%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|=============================================================== | 89%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
## View the data
message(sprintf("Number of rows: %s, Number of columns: %s", nrow(tract), ncol(tract)))
## Number of rows: 113, Number of columns: 11
tract %>%
head() %>%
knitr::kable() # neatly displaying tables on HTML document
| GEOID | NAME | hhincomeE | hhincomeM | race.totE | race.totM | race.whiteE | race.whiteM | race.blackE | race.blackM | geometry |
|---|---|---|---|---|---|---|---|---|---|---|
| 13135050436 | Census Tract 504.36, Gwinnett County, Georgia | 47200 | 8508 | 8635 | 1014 | 1888 | 658 | 2819 | 776 | MULTIPOLYGON (((-84.13236 3… |
| 13135050321 | Census Tract 503.21, Gwinnett County, Georgia | 143875 | 19938 | 4862 | 291 | 4174 | 269 | 78 | 74 | MULTIPOLYGON (((-84.26345 3… |
| 13135050419 | Census Tract 504.19, Gwinnett County, Georgia | 51930 | 11367 | 7885 | 905 | 2475 | 682 | 1376 | 423 | MULTIPOLYGON (((-84.20525 3… |
| 13135050524 | Census Tract 505.24, Gwinnett County, Georgia | 53594 | 8422 | 6952 | 685 | 3537 | 800 | 1198 | 393 | MULTIPOLYGON (((-84.1223 33… |
| 13135050313 | Census Tract 503.13, Gwinnett County, Georgia | 51794 | 8679 | 9279 | 1055 | 3657 | 1078 | 1862 | 334 | MULTIPOLYGON (((-84.20358 3… |
| 13135050318 | Census Tract 503.18, Gwinnett County, Georgia | 47321 | 7743 | 2807 | 492 | 1007 | 393 | 1457 | 445 | MULTIPOLYGON (((-84.25477 3… |
## Retain only those with estimates and rename them
tract <- tract %>%
select(GEOID,
hhincome = hhincomeE,
race.tot = race.totE,
race.white = race.whiteE,
race.black = race.blackE)
## Map the data
# Set tmap mode to interactive viewing
tmap_mode('view') %>% suppressMessages()
# Print out
tm_shape(tract) + tm_borders()
## Define a function that outputs the center of the bounding box of a polygon and the radius of a circle encompassing the bounding box tightly
# Function: Get tract-wise radius
get_r <- function(poly, epsg_id) {
#---------------------------------------------------------------------------------------------#
# Takes: a single POLYGON or LINESTRING #
# Outputs: distance between the centroid of the bounding box and a corner of the bounding box #
#---------------------------------------------------------------------------------------------#
# Get bounding box of a given polygon
bb <- st_bbox(poly)
# Get lat & long coordinates of any one corner of the bounding box
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
# Get centroid of the bb
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>%
st_sfc(crs = epsg_id) %>%
st_sf()
# Get the distance between bb_corner and bb_center
r <- st_distance(bb_corner, bb_center)
# Multiply 1.2 to make the circle a bit larger than the Census Tract.
# See the Yelp explanation of their radius parameter to see why we do this.
bb_center$radius <- r*1.2
return(bb_center)
}
## U0se a functional (lapply) to apply this custom function to each Census Tract.
epsg_id <- 4326 # WGS 84 (World Geodetic System 1984) used in GPS
r4all_apply <- tract %>%
st_geometry() %>%
st_transform(crs = epsg_id) %>%
lapply(., function(x) get_r(x, epsg_id = epsg_id))
r4all_apply <- bind_rows(r4all_apply)
## Appending XY coordinates as separate columns
ready_4_yelp <- r4all_apply %>%
mutate(x = st_coordinates(.)[, 1],
y = st_coordinates(.)[, 2])
## Visualize the results
# Activate the interactive mode
tmap_mode('view') %>% suppressMessages()
# Display data (the first 10 rows)
ready_4_yelp[1:10, ] %>%
st_buffer(., dist = .$radius) %>%
tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +
tm_shape(tract[1:10, ]) + tm_borders(col = 'blue')
## Load my Yelp API key
load(here('data', 'yelp_api_key.RData')) # Load the API key save in my local machine
## Define a function that downloads all businesses of a selected category in a Census tract
get_yelp <- function(tract, category){
# ---------------------------------------------------------------- #
# Gets one row of tract information (1,) and category name (str), #
# Outputs a list of business data.frame #
# ---------------------------------------------------------------- #
n <- 1
# First request ------------------------------------------------
resp <- business_search(api_key = yelp.api.key,
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
# Calculate how many requests are needed in total
required_n <- ceiling(resp$total/50)
# "out" is where the results will be appended to
out <- vector("list", required_n)
# Store the business information to n-th slot in "out"
out[[n]] <- resp$businesses
# Change the name of the elements to the total required_n
# This is to know if there are more than 1000 businesses, we know how many
names(out)[n] <- required_n
# Throw error if more than 1000
if (resp$total >= 1000)
{
# Glue formats string by inserting {n} with what's currently stored in object n
print(glue::glue("{n}th row has more than 1000 businesses."))
# Stop before going into the loop because we need to break down Census Tract to something smaller
return(out)
}
else
{
# Add 1 to n
n <- n + 1
# Now we know required_n -----------------------------------------------------
# Starting a loop
while(n <= required_n){
resp <- business_search(api_key = yelp.api.key,
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
}
# Merge all elements in the list into a single data frame
out <- out %>% bind_rows()
return(out)
}
}
## Prepare a collector
yelp_all_list_hair <- vector("list", nrow(ready_4_yelp))
## Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
yelp_all_list_hair[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "hair"))
if (row %% 20 == 0){
print(paste0("Current row: ", row))
}
}
## [1] "Current row: 20"
## [1] "Current row: 40"
## [1] "Current row: 60"
## [1] "Current row: 80"
## [1] "Current row: 100"
## Prepare a collector
yelp_all_list_bank <- vector("list", nrow(ready_4_yelp))
## Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
yelp_all_list_bank[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "banks"))
if (row %% 20 == 0){
print(paste0("Current row: ", row))
}
}
## [1] "Current row: 20"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 40"
## [1] "Current row: 60"
## [1] "Current row: 80"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 100"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## Collapsing the list into a data.frame
yelp_all_hair <- yelp_all_list_hair %>% bind_rows() %>% as_tibble() %>% mutate(Category = 'Hair Salons')
yelp_all_bank <- yelp_all_list_bank %>% bind_rows() %>% as_tibble() %>% mutate(Category = 'Banks & Credit Unions')
yelp_all <- bind_rows(yelp_all_hair, yelp_all_bank)
## Print
yelp_all %>% print(width=1000)
## # A tibble: 5,059 × 17
## id alias
## <chr> <chr>
## 1 nq4BI-LPC565xEUVITlUtw salon-supreme-and-braiding-lilburn
## 2 tycA5BGsoXC3kIF0cDberA marrowy-threading-wax-and-hair-lawrenceville
## 3 sqw62Y2FYGgFmsCZTkQAzw sl-braiding-and-beauty-house-lilburn-2
## 4 Qg5l_0eNjp6V1-aDwUfcJQ k-and-t-hair-salon-lawrenceville
## 5 vtfHPJWUNCUGjI9nPeBOhA formal-faces-atlanta
## 6 fiGDS1b-RO42xMGu6kr6Ag hair-precise-lilburn
## 7 3roxcGB9YeDyvgyc5pzYcA zapiens-salon-atlanta
## 8 fS93bC5lGqM4b2Fi27cmsQ salinas-salon-and-barber-lilburn
## 9 qH7a-awrwjjcUujhs_RXUQ salon-nine-seven-lilburn
## 10 ACkJmVqajhgmvhPBeBy1_A new-beauty-hair-and-nails-spa-lawrenceville
## name
## <chr>
## 1 Salon Supreme And Braiding
## 2 Marrowy Threading Wax & Hair
## 3 SL Braiding & Beauty House
## 4 K & T Hair Salon
## 5 Formal Faces
## 6 Hair Precise
## 7 Zapien's Salon - Atlanta
## 8 Salinas Salon & Barber
## 9 Salon Nine Seven
## 10 New Beauty Hair and Nails Spa
## image_url
## <chr>
## 1 "https://s3-media4.fl.yelpcdn.com/bphoto/k86Dj7K6wv2BHSYofmxRmw/o.jpg"
## 2 "https://s3-media1.fl.yelpcdn.com/bphoto/FuNc44lWBJKbfZ_VBiQgzg/o.jpg"
## 3 "https://s3-media3.fl.yelpcdn.com/bphoto/4mCNehcC7rijkVVgQuAZ1Q/o.jpg"
## 4 ""
## 5 "https://s3-media4.fl.yelpcdn.com/bphoto/tdWW4LWElUBd0T2rmU2gFw/o.jpg"
## 6 "https://s3-media2.fl.yelpcdn.com/bphoto/fwpO7Yxs9iRVnxSYRtmr9w/o.jpg"
## 7 "https://s3-media3.fl.yelpcdn.com/bphoto/8pAf8xBpKCuWMZOjqWL7DA/o.jpg"
## 8 "https://s3-media2.fl.yelpcdn.com/bphoto/2oy9koagrp598-TluUzQTA/o.jpg"
## 9 "https://s3-media2.fl.yelpcdn.com/bphoto/Lzl5gqtH8n9xEh5Fn_l6Ew/o.jpg"
## 10 "https://s3-media2.fl.yelpcdn.com/bphoto/sExChwoHgU6FV0Em8hioOQ/o.jpg"
## is_closed
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## url
## <chr>
## 1 https://www.yelp.com/biz/salon-supreme-and-braiding-lilburn?adjust_creative=…
## 2 https://www.yelp.com/biz/marrowy-threading-wax-and-hair-lawrenceville?adjust…
## 3 https://www.yelp.com/biz/sl-braiding-and-beauty-house-lilburn-2?adjust_creat…
## 4 https://www.yelp.com/biz/k-and-t-hair-salon-lawrenceville?adjust_creative=wA…
## 5 https://www.yelp.com/biz/formal-faces-atlanta?adjust_creative=wA5TIrpKGJmySk…
## 6 https://www.yelp.com/biz/hair-precise-lilburn?adjust_creative=wA5TIrpKGJmySk…
## 7 https://www.yelp.com/biz/zapiens-salon-atlanta?adjust_creative=wA5TIrpKGJmyS…
## 8 https://www.yelp.com/biz/salinas-salon-and-barber-lilburn?adjust_creative=wA…
## 9 https://www.yelp.com/biz/salon-nine-seven-lilburn?adjust_creative=wA5TIrpKGJ…
## 10 https://www.yelp.com/biz/new-beauty-hair-and-nails-spa-lawrenceville?adjust_…
## review_count categories rating coordinates$latitude $longitude transactions
## <int> <list> <dbl> <dbl> <dbl> <list>
## 1 7 <df [3 × 2]> 4 33.9 -84.1 <list [0]>
## 2 5 <df [3 × 2]> 5 33.9 -84.1 <list [0]>
## 3 20 <df [3 × 2]> 3 33.9 -84.1 <list [0]>
## 4 3 <df [1 × 2]> 4 33.9 -84.1 <list [0]>
## 5 54 <df [3 × 2]> 5 33.8 -84.4 <list [0]>
## 6 6 <df [1 × 2]> 3.5 33.9 -84.1 <list [0]>
## 7 34 <df [2 × 2]> 4.5 33.8 -84.4 <list [0]>
## 8 2 <df [1 × 2]> 5 33.9 -84.1 <list [0]>
## 9 3 <df [1 × 2]> 5 33.9 -84.1 <list [0]>
## 10 4 <df [2 × 2]> 4 33.9 -84.1 <list [0]>
## price location$address1 $address2 $address3 $city
## <chr> <chr> <chr> <chr> <chr>
## 1 $$ "550 Pleasant Hill Rd" "Ste B208" "" Lilburn
## 2 <NA> "3059 Lawrenceville Hwy" "Ste C" "" Lawrenceville
## 3 <NA> "4485 Lawrenceville Hwy NW" "Ste 210" "" Lilburn
## 4 $ "3870 Lawrenceville Hwy" "Ste C108" <NA> Lawrenceville
## 5 $$ "" "" "" Atlanta
## 6 $ "4153 Lawrenceville Hwy NW" "Ste 6" "" Lilburn
## 7 $$ "3209 Paces Ferry Pl" "Ste 2" "" Atlanta
## 8 <NA> "327 Arcado Rd" "" <NA> Lilburn
## 9 <NA> "97 Main St" <NA> "" Lilburn
## 10 $ "2785 Cruse Rd NW" <NA> "St 3" Lawrenceville
## $zip_code $country $state $display_address phone display_phone
## <chr> <chr> <chr> <list> <chr> <chr>
## 1 30047 US GA <chr [3]> +14705145300 (470) 514-5300
## 2 30044 US GA <chr [3]> +14049903353 (404) 990-3353
## 3 30047 US GA <chr [3]> +14049339924 (404) 933-9924
## 4 30044 US GA <chr [3]> +17708069181 (770) 806-9181
## 5 30303 US GA <chr [1]> +17706827627 (770) 682-7627
## 6 30047 US GA <chr [3]> +17709230650 (770) 923-0650
## 7 30305 US GA <chr [3]> +14042312040 (404) 231-2040
## 8 30047 US GA <chr [2]> +14705097181 (470) 509-7181
## 9 30047 US GA <chr [2]> +16786948384 (678) 694-8384
## 10 30044 US GA <chr [3]> +16786911863 (678) 691-1863
## distance Category
## <dbl> <chr>
## 1 424. Hair Salons
## 2 2684. Hair Salons
## 3 2187. Hair Salons
## 4 392. Hair Salons
## 5 30866. Hair Salons
## 6 1036. Hair Salons
## 7 26124. Hair Salons
## 8 1397. Hair Salons
## 9 3480. Hair Salons
## 10 3679. Hair Salons
## # … with 5,049 more rows
## Extract coordinates
yelp_sf <- yelp_all %>%
mutate(x = .$coordinates$longitude,
y = .$coordinates$latitude) %>%
filter(!is.na(x) & !is.na(y)) %>%
st_as_sf(coords = c("x", "y"), crs = 4326)
## Map
tm_shape(yelp_sf) +
tm_dots(col = "Category") +
tm_layout()
## Save outputs
write_rds(yelp_all_bank, file = here('data', 'yelp_all_bank_mini_assignment1.rds'))
write_rds(yelp_all_hair, file = here('data', 'yelp_all_hair_mini_assignment1.rds'))
write_rds(yelp_all, file = here('data', 'yelp_all_mini_assignment1.rds'))
write_rds(yelp_sf, file = here('data', 'yelp_sf_mini_assignment1.rds'))
What’s the county and state of your choice?
Gwinnett County, Georgia
How many businesses are there in total?
5059 businesses in total
How many businesses are there for each business category?
Hair Salons: 4230
Banks: 829
Upon visual inspection, can you see any noticeable spatial
patterns to the way they are distributed across the county (e.g.,
clustering of businesses at some parts of the county)?
Both types of businesses usually are located near arterial roads
rather than near local or residential roads, which is more prominent for
banks than for hair salons. Also, businesses are more concentrated in
the western side of the county. This makes sense considering Census
tracts are much smaller on the western side, indicating a higher
population density.
(Optional) Are there any other interesting findings?
I did not expect this much number of hair salons. I expected a
slightly higher number of hair salons than that of banks. This large
number of businesses under the category of “hair” (the category code for
“hair salons”) seems to stem from the fact that this category includes
businesses of “Blow Dry/Out Services”, “Hair Extensions”, “Hair
Stylists”, “Kids Hair Salons”, and “Men’s Hair Salons”
categories.