library(tidycensus)
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.1, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
library(jsonlite)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x purrr::flatten() masks jsonlite::flatten()
## x dplyr::lag() masks stats::lag()
library(httr)
library(jsonlite)
library(reshape2)
##
## 载入程辑包:'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(here)
## here() starts at C:/Users/11969/Desktop/Intro to Urban_Analytics/Assignment1
library(yelpr)
library(knitr)
## To install your API key for use in future sessions, run this function with `install = TRUE`.
## To install your API key for use in future sessions, run this function with `install = TRUE`.
Q1:What’s the county and state of your choice? New York, Queens
# Tract polygons for the Yelp query
tract <- suppressMessages(
get_acs(geography = "tract",
state = "NY",
county = c("Queens"),
variables = c(hhincome = 'B19019_001',
race.tot = "B02001_001",
race.white = "B02001_002",
race.black = 'B02001_003'
),
year = 2019,
survey = "acs5",
geometry = TRUE,
output = "wide")
)
# Retaining only what we want
tract <- tract %>%
select(GEOID,
hhincome = hhincomeE,
race.tot = race.totE,
race.white = race.whiteE,
race.black = race.blackE)
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tract) + tm_borders()
# figure out the distance between the centroid
epsg_id<-4326
get_r <- function(poly, epsg_id){
bb <- st_bbox(poly)
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
r <- st_distance(bb_corner, bb_center)
bb_center$radius <- r*1.2
return(bb_center)
}
# Using a loop
r4all_loop <- vector("list", nrow(tract))
for (i in 1:nrow(tract)){
r4all_loop[[i]] <- tract %>%
st_transform(crs = epsg_id) %>%
st_geometry() %>%
.[[i]] %>%
get_r(epsg_id = epsg_id)
}
r4all_loop <- bind_rows(r4all_loop)
# Using a functional
r4all_apply <- tract %>%
st_geometry() %>%
st_transform(crs = epsg_id) %>%
lapply(., function(x) get_r(x, epsg_id = epsg_id))
r4all_apply <- bind_rows(r4all_apply)
identical(r4all_apply, r4all_loop)
## [1] TRUE
# Appending X Y coordinates as separate columns
ready_4_yelp <- r4all_apply %>%
mutate(x = st_coordinates(.)[,1],
y = st_coordinates(.)[,2])
# process automatic function
get_yelp <- function(tract, category){
n <- 1
resp <- business_search(api_key = client_secret,
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
required_n <- ceiling(resp$total/50)
out <- vector("list", required_n)
out[[n]] <- resp$businesses
names(out)[n] <- required_n
if (resp$total >= 1000)
{
print(glue::glue("{n}th row has >= 1000 businesses."))
return(out)
}
else
{
n <- n + 1
while(n <= required_n){
resp <- business_search(api_key = client_secret,
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
}
out <- out %>% bind_rows()
return(out)
}
}
# Apply the function for the first Census Tract
yelp_first_tract <- get_yelp(ready_4_yelp[1,], "tattoo,healthmarkets") %>%
as_tibble()
## No encoding supplied: defaulting to UTF-8.
yelp_first_tract %>% print
## # A tibble: 9 x 16
## id alias name image~1 is_cl~2 url revie~3 categ~4 rating coord~5 trans~6
## <chr> <chr> <chr> <chr> <lgl> <chr> <int> <list> <dbl> <dbl> <list>
## 1 iilX~ stud~ Stud~ "https~ FALSE http~ 83 <df> 4.5 40.7 <list>
## 2 wh7t~ tatt~ Tatt~ "https~ FALSE http~ 17 <df> 4.5 40.7 <list>
## 3 UsJN~ kund~ Kund~ "https~ FALSE http~ 19 <df> 4.5 40.7 <list>
## 4 WRwn~ down~ Down~ "https~ FALSE http~ 68 <df> 3 40.8 <list>
## 5 xBDr~ jack~ jack~ "https~ FALSE http~ 8 <df> 4 40.7 <list>
## 6 Y2-d~ craz~ Craz~ "" FALSE http~ 5 <df> 4.5 40.7 <list>
## 7 bBjC~ gnc-~ GNC "https~ FALSE http~ 4 <df> 4 40.7 <list>
## 8 Zfin~ help~ Help~ "https~ FALSE http~ 4 <df> 5 40.7 <list>
## 9 5-f6~ pier~ Pier~ "https~ FALSE http~ 1 <df> 1 40.7 <list>
## # ... with 6 more variables: coordinates$longitude <dbl>, price <chr>,
## # location <df[,8]>, phone <chr>, display_phone <chr>, distance <dbl>, and
## # abbreviated variable names 1: image_url, 2: is_closed, 3: review_count,
## # 4: categories, 5: coordinates$latitude, 6: transactions
# Prepare a collector
yelp_all_list <- vector("list", nrow(ready_4_yelp))
for (row in 1:nrow(ready_4_yelp)){
yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "tattoo,healthmarkets"))
if (row %% 50 == 0){
print(paste0("Current row: ", row))
}
}
## [1] "Current row: 50"
## [1] "Current row: 100"
## [1] "Current row: 150"
## [1] "Current row: 200"
## [1] "Current row: 250"
## [1] "Current row: 300"
## [1] "Current row: 350"
## [1] "Current row: 400"
## [1] "Current row: 450"
## [1] "Current row: 500"
## [1] "Current row: 550"
## [1] "Current row: 600"
## [1] "Current row: 650"
# Collapsing the list into a data.frame
yelp_all <- yelp_all_list %>% bind_rows() %>% as_tibble()
yelp_all %>% print(width=1000)
## # A tibble: 1,183 x 16
## id alias
## <chr> <chr>
## 1 iilXA7Mo0EPR2MzP6PRN2g studio-316-body-art-jackson-heights
## 2 wh7tZTmjUd674AvEvqEgoA tattoo-alley-jackson-heights
## 3 UsJNi1BfGSMVEq1FeQBYBg kundalinink-jackson-heights
## 4 WRwnj3nLJU8HJimezp69ww downtown-natural-market-jackson-heights
## 5 xBDrWzaaBtYBAqOjxXMbZw jackie-tattoo-queens
## 6 Y2-dtd1eCcb8FnG8ypOVjg crazy-tattoos-jackson-heights
## 7 bBjCeP0X7cFktlptvo0mkw gnc-jackson-heights-2
## 8 Zfino-g_AcmJ6Q8mU1ZEPw help-your-self-new-york-4
## 9 5-f6z0MHL4WSoL4cdpcXEw piercemania-jackson-heights
## 10 TXsBFa4Xpl4DZhGZhCxF8w chiko-tattoos-woodside
## name
## <chr>
## 1 Studio 316 Body Art
## 2 Tattoo Alley
## 3 Kundalinink
## 4 Downtown Natural Market
## 5 jackie tattoo
## 6 Crazy Tattoos
## 7 GNC
## 8 Help Your Self
## 9 Piercemania
## 10 Chiko Tattoos
## image_url
## <chr>
## 1 "https://s3-media1.fl.yelpcdn.com/bphoto/RyeoyaOsSJ3hI_2MHH6iZw/o.jpg"
## 2 "https://s3-media3.fl.yelpcdn.com/bphoto/QnLNIeXzFPwN_GnCw2OvNA/o.jpg"
## 3 "https://s3-media2.fl.yelpcdn.com/bphoto/UQfR8O--RDJHGpRnBEU9nA/o.jpg"
## 4 "https://s3-media1.fl.yelpcdn.com/bphoto/ythVLsOlnR05SO5Lwc3JqA/o.jpg"
## 5 "https://s3-media3.fl.yelpcdn.com/bphoto/nFn4RNDD0ssveRiHdlXwSg/o.jpg"
## 6 ""
## 7 "https://s3-media3.fl.yelpcdn.com/bphoto/xuKMuKclSEc49W5FAY7GMA/o.jpg"
## 8 "https://s3-media1.fl.yelpcdn.com/bphoto/MUlzNuICd3-eLCDls7GSag/o.jpg"
## 9 "https://s3-media2.fl.yelpcdn.com/bphoto/IZutjXsNChs37IplPZLxUA/o.jpg"
## 10 "https://s3-media1.fl.yelpcdn.com/bphoto/-yjL7TwIi_Rx4vyR1_8DtQ/o.jpg"
## is_closed
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## url
## <chr>
## 1 https://www.yelp.com/biz/studio-316-body-art-jackson-heights?adjust_creative~
## 2 https://www.yelp.com/biz/tattoo-alley-jackson-heights?adjust_creative=j2i5Pq~
## 3 https://www.yelp.com/biz/kundalinink-jackson-heights?adjust_creative=j2i5PqJ~
## 4 https://www.yelp.com/biz/downtown-natural-market-jackson-heights?adjust_crea~
## 5 https://www.yelp.com/biz/jackie-tattoo-queens?adjust_creative=j2i5PqJqqn7tHh~
## 6 https://www.yelp.com/biz/crazy-tattoos-jackson-heights?adjust_creative=j2i5P~
## 7 https://www.yelp.com/biz/gnc-jackson-heights-2?adjust_creative=j2i5PqJqqn7tH~
## 8 https://www.yelp.com/biz/help-your-self-new-york-4?adjust_creative=j2i5PqJqq~
## 9 https://www.yelp.com/biz/piercemania-jackson-heights?adjust_creative=j2i5PqJ~
## 10 https://www.yelp.com/biz/chiko-tattoos-woodside?adjust_creative=j2i5PqJqqn7t~
## review_count categories rating coordinates$latitude $longitude transactions
## <int> <list> <dbl> <dbl> <dbl> <list>
## 1 83 <df [2 x 2]> 4.5 40.7 -73.9 <list [0]>
## 2 17 <df [2 x 2]> 4.5 40.7 -73.9 <list [0]>
## 3 19 <df [2 x 2]> 4.5 40.7 -73.9 <list [0]>
## 4 68 <df [3 x 2]> 3 40.8 -73.9 <list [0]>
## 5 8 <df [1 x 2]> 4 40.7 -73.9 <list [0]>
## 6 5 <df [1 x 2]> 4.5 40.7 -73.9 <list [0]>
## 7 4 <df [2 x 2]> 4 40.7 -73.9 <list [0]>
## 8 4 <df [2 x 2]> 5 40.7 -74.0 <list [0]>
## 9 1 <df [2 x 2]> 1 40.7 -73.9 <list [0]>
## 10 2 <df [2 x 2]> 5 40.7 -73.9 <list [0]>
## price location$address1 $address2 $address3 $city $zip_code
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 $$ 84-22 Roosevelt Ave "Fl 2" "" Jackson Heights 11372
## 2 $$ 84-22 Roosevelt Ave "FL 2" "" Jackson Heights 11372
## 3 $$ 81-09 Roosevelt Ave "Fl 2" "" Jackson Heights 11372
## 4 $$ 8401 37th Ave "" "" Jackson Heights 11372
## 5 <NA> 81-16 Roosevelt Ave "Fl 2" <NA> Queens 11372
## 6 $$ 8207 Roosevelt Ave "" "" Jackson Heights 11372
## 7 $$ 37-21 82Nd Street <NA> <NA> Jackson Heights 11372
## 8 $$ 123 Fourth Ave <NA> "" New York 10003
## 9 <NA> 8422 Roosevelt Av "Fl 2" "" Jackson Heights 11372
## 10 <NA> 39-16 65th St <NA> "" Woodside 11377
## $country $state $display_address phone display_phone distance
## <chr> <chr> <list> <chr> <chr> <dbl>
## 1 US NY <chr [3]> +17188033977 (718) 803-3977 593.
## 2 US NY <chr [3]> +17188033977 (718) 803-3977 593.
## 3 US NY <chr [3]> +17183978723 (718) 397-8723 740.
## 4 US NY <chr [2]> +17184240136 (718) 424-0136 438.
## 5 US NY <chr [3]> +16462440910 (646) 244-0910 752.
## 6 US NY <chr [2]> +13474598978 (347) 459-8978 687.
## 7 US NY <chr [2]> +17184586737 (718) 458-6737 608.
## 8 US NY <chr [2]> +17185763010 (718) 576-3010 9640.
## 9 US NY <chr [3]> +17188033977 (718) 803-3977 593.
## 10 US NY <chr [2]> +13478868339 (347) 886-8339 167.
## # ... with 1,173 more rows
Q2:How many businesses are there in total?## 46
# Extract coordinates
yelp_sf <- yelp_all %>%
mutate(x = .$coordinates$longitude,
y = .$coordinates$latitude) %>%
filter(!is.na(x) & !is.na(y)) %>%
st_as_sf(coords = c("x", "y"), crs = 4326)
Q3:How many businesses are there for each business category?## For tattoo 22. For healthmarkets 24
### count tattoo category ###
total_busi<-unlist(yelp_sf$categories)
total_tatto<-str_detect(total_busi, "tattoo")
sum(total_tatto)
## [1] 677
# count healthmarket category
total_health<-str_detect(total_busi, "healthmarkets")
sum(total_health)
## [1] 507
# Map
tm_shape(yelp_sf) +
tm_dots(col = "review_count", style="quantile")
Q4: Upon visual inspection, can you see any noticeable spatial patterns to the way they are distributed across the county##
Tattoo and healthmarkets are concentrated in the central part of Queens, none of them are in the bay and they are located near the main roads, only a few of them are deep in the plots without roads
(Optional) Are there any other interesting findings? Two ‘tattoos’ API returned different results, I don’t know why