##Libraries
library(tidycensus)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
library(jsonlite)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag() masks stats::lag()
library(httr)
library(jsonlite)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(here)
## here() starts at C:/Users/fhasan30/OneDrive - Georgia Institute of Technology/Documents/CP 8883
library(yelpr)
library(knitr)
#census api
a <- census_api_key(Sys.getenv("census_api"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
b <- Sys.getenv("census_api")
#collin county TX census tract boundary
tract <- suppressMessages(
get_acs(geography = "tract", # or "block group", "county", "state" etc.
state = "TX",
county = c("collin county"),
variables = c(hhincome = 'B19019_001',
race.tot = "B02001_001",
race.white = "B02001_002",
race.black = 'B02001_003'
),
year = 2019,
survey = "acs5", # American Community Survey 5-year estimate
geometry = TRUE, # returns sf objects
output = "wide") # wide vs. long
)
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|============== | 21%
|
|=============== | 21%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|===================================== | 54%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 82%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================= | 87%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================= | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
#only keeping E collumns
tract <- tract %>%
select(GEOID,
hhincome = hhincomeE, # New name = old name
race.tot = race.totE,
race.white = race.whiteE,
race.black = race.blackE)
#view map
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tract) + tm_borders()
# Function: Get tract-wise radius
get_r <- function(poly, epsg_id){
bb <- st_bbox(poly) #bounding box for polygon
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)#lat and long
#centroid of the bounding box
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
r <- st_distance(bb_corner, bb_center) #finding distance
bb_center$radius <- r*1.2 # making circle a little bigger than the census tract
return(bb_center)
}
epsg_id <- 4326 #measures distance in meters
#empty vector
r4all_loop <- vector("list", nrow(tract))
# for loop adding info to vector
for (i in 1:nrow(tract)){
r4all_loop[[i]] <- tract %>%
st_transform(crs = epsg_id) %>%
st_geometry() %>%
.[[i]] %>%
get_r(epsg_id = epsg_id)
}
r4all_loop <- bind_rows(r4all_loop)
#applying this fucntion to each cencus tract
r4all_apply <- tract %>%
st_geometry() %>%
st_transform(crs = epsg_id) %>%
lapply(., function(x) get_r(x, epsg_id = epsg_id))
r4all_apply <- bind_rows(r4all_apply)
# identical?
identical(r4all_apply, r4all_loop)
## [1] TRUE
# putting xy coordinates in two columns
ready_4_yelp <- r4all_apply %>%
mutate(x = st_coordinates(.)[,1],
y = st_coordinates(.)[,2])
tmap_mode('view')
## tmap mode set to interactive viewing
ready_4_yelp[1:10,] %>%
# Draw a buffer
st_buffer(., dist = .$radius) %>%
# buffer shown in red
tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +
# original polygon shown in blue
tm_shape(tract[1:10,]) + tm_borders(col= 'blue')
#accessing Yelp API
api_key = Sys.getenv("yelp_api")
which_tract <- 1
#getting park data
parks = business_search(api_key = api_key,
category = 'Parks' , # find hopitals
latitude = ready_4_yelp$y[which_tract],
longitude = ready_4_yelp$x[which_tract],
offset = 0, # 1st page, 1st obs
radius = round(ready_4_yelp$radius[which_tract]), # radius requires integer value
limit = 50) # how many business per page
## No encoding supplied: defaulting to UTF-8.
#getting restraunt data
resteraunts = business_search(api_key = api_key,
category = 'resteraunt' , # find hopitals
latitude = ready_4_yelp$y[which_tract],
longitude = ready_4_yelp$x[which_tract],
offset = 0, # 1st page, 1st obs
radius = round(ready_4_yelp$radius[which_tract]), # radius requires integer value
limit = 50) # how many business per page
## No encoding supplied: defaulting to UTF-8.
#combine both catagories
buisness = bind_rows(parks,resteraunts)
# search for parks and resteraunts within a certain radius, limit to 50 business per page
test <- business_search(api_key = api_key,
category = 'buisness' ,
latitude = ready_4_yelp$y[which_tract],
longitude = ready_4_yelp$x[which_tract],
offset = 0, # 1st page, 1st obs
radius = round(ready_4_yelp$radius[which_tract]),
limit = 50)
## No encoding supplied: defaulting to UTF-8.
lapply(test, head)
## $businesses
## id alias
## 1 NJv8BeJRTYYQ9Yz2T23JaA densetsu-plano
## 2 d0dcwS7yajl_HGJf-z5NqA saucys-thai-and-pho-plano-plano
## 3 p-f1iTBbupSun6mEbsDECA covinos-pasta-and-pizza-plano
## 4 1rhWjKvf8MYEwrjbxflp1w bull-daddy-noodle-bistro-plano
## 5 4mPHnuj9Cy1wBWW1FI_ziw keds-ice-cream-waffles-and-crepes-plano
## 6 TFmrYQuUDb8p6tf-6V-Vbw lilys-cafe-plano
## name
## 1 Densetsu
## 2 Saucy's Thai & Pho - Plano
## 3 Covino's Pasta & Pizza
## 4 Bull Daddy Noodle Bistro
## 5 Ked's Ice Cream, Waffles & Crepes
## 6 Lily's Cafe
## image_url
## 1 https://s3-media4.fl.yelpcdn.com/bphoto/UNKO241hxzWv49N5dAvgtA/o.jpg
## 2 https://s3-media1.fl.yelpcdn.com/bphoto/ifJZbs408x6MkEC8Du7dew/o.jpg
## 3 https://s3-media3.fl.yelpcdn.com/bphoto/kd_fbh7Aq3LigJ_7HGnL3g/o.jpg
## 4 https://s3-media3.fl.yelpcdn.com/bphoto/zaDRBqL3OPC6v0bp9GMIrg/o.jpg
## 5 https://s3-media3.fl.yelpcdn.com/bphoto/zXyFbiBustWfvu8TlD1LfQ/o.jpg
## 6 https://s3-media3.fl.yelpcdn.com/bphoto/zmxXSG8GtQIfs9J_mgYYuQ/o.jpg
## is_closed
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## url
## 1 https://www.yelp.com/biz/densetsu-plano?adjust_creative=bYB2eEjjKRvYZCw5ZxM2xQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=bYB2eEjjKRvYZCw5ZxM2xQ
## 2 https://www.yelp.com/biz/saucys-thai-and-pho-plano-plano?adjust_creative=bYB2eEjjKRvYZCw5ZxM2xQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=bYB2eEjjKRvYZCw5ZxM2xQ
## 3 https://www.yelp.com/biz/covinos-pasta-and-pizza-plano?adjust_creative=bYB2eEjjKRvYZCw5ZxM2xQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=bYB2eEjjKRvYZCw5ZxM2xQ
## 4 https://www.yelp.com/biz/bull-daddy-noodle-bistro-plano?adjust_creative=bYB2eEjjKRvYZCw5ZxM2xQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=bYB2eEjjKRvYZCw5ZxM2xQ
## 5 https://www.yelp.com/biz/keds-ice-cream-waffles-and-crepes-plano?adjust_creative=bYB2eEjjKRvYZCw5ZxM2xQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=bYB2eEjjKRvYZCw5ZxM2xQ
## 6 https://www.yelp.com/biz/lilys-cafe-plano?adjust_creative=bYB2eEjjKRvYZCw5ZxM2xQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=bYB2eEjjKRvYZCw5ZxM2xQ
## review_count
## 1 1462
## 2 477
## 3 258
## 4 322
## 5 297
## 6 218
## categories
## 1 japanese, sushi, Japanese, Sushi Bars
## 2 thai, Thai
## 3 pizza, italian, Pizza, Italian
## 4 taiwanese, chinese, noodles, Taiwanese, Chinese, Noodles
## 5 icecream, desserts, Ice Cream & Frozen Yogurt, Desserts
## 6 tradamerican, breakfast_brunch, tex-mex, American (Traditional), Breakfast & Brunch, Tex-Mex
## rating coordinates.latitude coordinates.longitude transactions price
## 1 4.0 33.05551 -96.77257 pickup, delivery $$
## 2 4.5 33.05562 -96.77307 pickup, delivery $$
## 3 4.0 33.03988 -96.75585 delivery $$
## 4 4.0 33.04258 -96.77053 pickup, delivery $$
## 5 4.0 33.05704 -96.77196 delivery $$
## 6 4.5 33.04060 -96.75070 pickup, delivery $
## location.address1 location.address2 location.address3 location.city
## 1 4152 W Spring Creek Pkwy Ste 176 Plano
## 2 4152 W Spring Creek Pkwy <NA> Plano
## 3 3265 Independence Pkwy Plano
## 4 3309 Coit Rd Ste 100 Plano
## 5 6205 Coit Rd Ste 344 Plano
## 6 3100 Independence Pkwy Ste 299 Plano
## location.zip_code location.country location.state
## 1 75024 US TX
## 2 75024 US TX
## 3 75075 US TX
## 4 75093 US TX
## 5 75024 US TX
## 6 75075 US TX
## location.display_address phone
## 1 4152 W Spring Creek Pkwy, Ste 176, Plano, TX 75024 +19729647875
## 2 4152 W Spring Creek Pkwy, Plano, TX 75024 +19723887650
## 3 3265 Independence Pkwy, Plano, TX 75075 +19725190345
## 4 3309 Coit Rd, Ste 100, Plano, TX 75093 +19725191088
## 5 6205 Coit Rd, Ste 344, Plano, TX 75024 +19726730285
## 6 3100 Independence Pkwy, Ste 299, Plano, TX 75075 +19723129732
## display_phone distance
## 1 (972) 964-7875 1327.913
## 2 (972) 388-7650 1355.599
## 3 (972) 519-0345 1118.412
## 4 (972) 519-1088 1187.192
## 5 (972) 673-0285 1382.249
## 6 (972) 312-9732 1318.830
##
## $total
## [1] 72
##
## $region
## $region$center
## $region$center$longitude
## [1] -96.7605
##
## $region$center$latitude
## [1] 33.04915
names(test)
## [1] "businesses" "total" "region"
paste0("is it a data.frame?: ", is.data.frame(test$businesses), ", ",
" how many rows?: ", nrow(test$businesses), ", ",
" how many columns?: ", ncol(test$businesses))
## [1] "is it a data.frame?: TRUE, how many rows?: 50, how many columns?: 16"
# FUNCTION
get_yelp <- function(tract, category){
n <- 1
# First request --------------------------------------------------------------
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50, # = 0 when n = 1
radius = round(tract$radius),
limit = 50)
# total requests
required_n <- ceiling(resp$total/50)
# results appended to out
out <- vector("list", required_n)
# Store the business information to nth slot in out
out[[n]] <- resp$businesses
# Change the name of the elements to the total required_n
# This is to know if there are more than 1000 businesses,
# we know how many.
names(out)[n] <- required_n
# Throw error if more than 1000
if (resp$total >= 1000)
{
# glue formats string by inserting {n} with what's currently stored in object n.
print(glue::glue("{n}th row has >= 1000 businesses."))
# Stop before going into the loop because we need to
# break down Census Tract to something smaller.
return(out)
}
else
{
# add 1 to n
n <- n + 1
# Starting a loop
while(n <= required_n){
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
}
# Merge all elements in the list into a single data frame
out <- out %>% bind_rows()
return(out)
}
}
yelp_first_tract <- get_yelp(ready_4_yelp[1,], "buisness") %>%
as_tibble()
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
yelp_first_tract %>% print
## # A tibble: 72 × 16
## id alias name image…¹ is_cl…² url revie…³ categ…⁴ rating coord…⁵
## <chr> <chr> <chr> <chr> <lgl> <chr> <int> <list> <dbl> <dbl>
## 1 NJv8BeJRTYY… dens… Dens… https:… FALSE http… 1462 <df> 4 33.1
## 2 d0dcwS7yajl… sauc… Sauc… https:… FALSE http… 477 <df> 4.5 33.1
## 3 p-f1iTBbupS… covi… Covi… https:… FALSE http… 258 <df> 4 33.0
## 4 1rhWjKvf8MY… bull… Bull… https:… FALSE http… 322 <df> 4 33.0
## 5 4mPHnuj9Cy1… keds… Ked'… https:… FALSE http… 297 <df> 4 33.1
## 6 TFmrYQuUDb8… lily… Lily… https:… FALSE http… 218 <df> 4.5 33.0
## 7 R4RZRviKKXB… squa… Squa… https:… FALSE http… 194 <df> 4.5 33.1
## 8 h7pf4HDYW0L… mudl… Mudl… https:… FALSE http… 237 <df> 4 33.0
## 9 0juKqFjPGm0… henr… Henr… https:… FALSE http… 306 <df> 4 33.0
## 10 AHAjkzGiZQV… omge… OMGe… https:… FALSE http… 62 <df> 5 33.1
## # … with 62 more rows, 7 more variables: coordinates$longitude <dbl>,
## # transactions <list>, price <chr>, location <df[,8]>, phone <chr>,
## # display_phone <chr>, distance <dbl>, and abbreviated variable names
## # ¹image_url, ²is_closed, ³review_count, ⁴categories, ⁵coordinates$latitude
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
yelp_all_list <- vector("list", nrow(ready_4_yelp))
# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "buisness"))
if (row %% 50 == 0){
print(paste0("Current row: ", row))
}
}
## [1] "Current row: 50"
## [1] "Current row: 100"
## [1] "Current row: 150"
yelp_all <- yelp_all_list %>% bind_rows() %>% as_tibble()
yelp_all %>% print(width=1000)
## # A tibble: 17,283 × 16
## id alias
## <chr> <chr>
## 1 NJv8BeJRTYYQ9Yz2T23JaA densetsu-plano
## 2 d0dcwS7yajl_HGJf-z5NqA saucys-thai-and-pho-plano-plano
## 3 p-f1iTBbupSun6mEbsDECA covinos-pasta-and-pizza-plano
## 4 1rhWjKvf8MYEwrjbxflp1w bull-daddy-noodle-bistro-plano
## 5 4mPHnuj9Cy1wBWW1FI_ziw keds-ice-cream-waffles-and-crepes-plano
## 6 TFmrYQuUDb8p6tf-6V-Vbw lilys-cafe-plano
## 7 R4RZRviKKXBU6bJUJstkGg squares-hot-pot-plano
## 8 h7pf4HDYW0LLJuSWdUzpQw mudleaf-coffee-plano
## 9 0juKqFjPGm0G3Urk6oiyBg henrys-homemade-ice-cream-plano
## 10 AHAjkzGiZQV4UPv1CnF1Gg omgelato-plano-4
## name
## <chr>
## 1 Densetsu
## 2 Saucy's Thai & Pho - Plano
## 3 Covino's Pasta & Pizza
## 4 Bull Daddy Noodle Bistro
## 5 Ked's Ice Cream, Waffles & Crepes
## 6 Lily's Cafe
## 7 Squares Hot Pot
## 8 Mudleaf Coffee
## 9 Henry's Homemade Ice Cream
## 10 OMGelato
## image_url
## <chr>
## 1 https://s3-media4.fl.yelpcdn.com/bphoto/UNKO241hxzWv49N5dAvgtA/o.jpg
## 2 https://s3-media1.fl.yelpcdn.com/bphoto/ifJZbs408x6MkEC8Du7dew/o.jpg
## 3 https://s3-media3.fl.yelpcdn.com/bphoto/kd_fbh7Aq3LigJ_7HGnL3g/o.jpg
## 4 https://s3-media3.fl.yelpcdn.com/bphoto/zaDRBqL3OPC6v0bp9GMIrg/o.jpg
## 5 https://s3-media3.fl.yelpcdn.com/bphoto/zXyFbiBustWfvu8TlD1LfQ/o.jpg
## 6 https://s3-media3.fl.yelpcdn.com/bphoto/zmxXSG8GtQIfs9J_mgYYuQ/o.jpg
## 7 https://s3-media4.fl.yelpcdn.com/bphoto/FSXS-kj3q2hurH9jAlBZ2w/o.jpg
## 8 https://s3-media4.fl.yelpcdn.com/bphoto/XFr1iUCUQelR9qcfN7Pkng/o.jpg
## 9 https://s3-media4.fl.yelpcdn.com/bphoto/U4nC3eWsj0EyPQ28VP9xoQ/o.jpg
## 10 https://s3-media3.fl.yelpcdn.com/bphoto/ytkEp-4NLC6-6X8Kh9GNIw/o.jpg
## is_closed
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## url
## <chr>
## 1 https://www.yelp.com/biz/densetsu-plano?adjust_creative=bYB2eEjjKRvYZCw5ZxM2…
## 2 https://www.yelp.com/biz/saucys-thai-and-pho-plano-plano?adjust_creative=bYB…
## 3 https://www.yelp.com/biz/covinos-pasta-and-pizza-plano?adjust_creative=bYB2e…
## 4 https://www.yelp.com/biz/bull-daddy-noodle-bistro-plano?adjust_creative=bYB2…
## 5 https://www.yelp.com/biz/keds-ice-cream-waffles-and-crepes-plano?adjust_crea…
## 6 https://www.yelp.com/biz/lilys-cafe-plano?adjust_creative=bYB2eEjjKRvYZCw5Zx…
## 7 https://www.yelp.com/biz/squares-hot-pot-plano?adjust_creative=bYB2eEjjKRvYZ…
## 8 https://www.yelp.com/biz/mudleaf-coffee-plano?adjust_creative=bYB2eEjjKRvYZC…
## 9 https://www.yelp.com/biz/henrys-homemade-ice-cream-plano?adjust_creative=bYB…
## 10 https://www.yelp.com/biz/omgelato-plano-4?adjust_creative=bYB2eEjjKRvYZCw5Zx…
## review_count categories rating coordinates$latitude $longitude transactions
## <int> <list> <dbl> <dbl> <dbl> <list>
## 1 1462 <df [2 × 2]> 4 33.1 -96.8 <chr [2]>
## 2 477 <df [1 × 2]> 4.5 33.1 -96.8 <chr [2]>
## 3 258 <df [2 × 2]> 4 33.0 -96.8 <chr [1]>
## 4 322 <df [3 × 2]> 4 33.0 -96.8 <chr [2]>
## 5 297 <df [2 × 2]> 4 33.1 -96.8 <chr [1]>
## 6 218 <df [3 × 2]> 4.5 33.0 -96.8 <chr [2]>
## 7 194 <df [2 × 2]> 4.5 33.1 -96.8 <chr [0]>
## 8 237 <df [1 × 2]> 4 33.0 -96.8 <chr [1]>
## 9 306 <df [1 × 2]> 4 33.0 -96.8 <chr [0]>
## 10 62 <df [1 × 2]> 5 33.1 -96.8 <chr [2]>
## price location$address1 $address2 $address3 $city $zip_code $country
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 $$ 4152 W Spring Creek Pkwy "Ste 176" "" Plano 75024 US
## 2 $$ 4152 W Spring Creek Pkwy "" <NA> Plano 75024 US
## 3 $$ 3265 Independence Pkwy "" "" Plano 75075 US
## 4 $$ 3309 Coit Rd "Ste 100" "" Plano 75093 US
## 5 $$ 6205 Coit Rd "Ste 344" "" Plano 75024 US
## 6 $ 3100 Independence Pkwy "Ste 299" "" Plano 75075 US
## 7 $$ 4152 W Spring Creek Pkwy "Ste 108" <NA> Plano 75024 US
## 8 $$ 3100 Independence Pkwy "Ste 300" <NA> Plano 75075 US
## 9 $ 3100 Independence Pkwy "Ste 215" "" Plano 75075 US
## 10 <NA> 3829 W Springcreek Pkwy "Ste 103" "" Plano 75023 US
## $state $display_address phone display_phone distance
## <chr> <list> <chr> <chr> <dbl>
## 1 TX <chr [3]> +19729647875 (972) 964-7875 1328.
## 2 TX <chr [2]> +19723887650 (972) 388-7650 1356.
## 3 TX <chr [2]> +19725190345 (972) 519-0345 1118.
## 4 TX <chr [3]> +19725191088 (972) 519-1088 1187.
## 5 TX <chr [3]> +19726730285 (972) 673-0285 1382.
## 6 TX <chr [3]> +19723129732 (972) 312-9732 1319.
## 7 TX <chr [3]> +19725964357 (972) 596-4357 1366.
## 8 TX <chr [3]> +14699312233 (469) 931-2233 1299.
## 9 TX <chr [3]> +19726129949 (972) 612-9949 1315.
## 10 TX <chr [3]> +14693950903 (469) 395-0903 1247.
## # … with 17,273 more rows
## # ℹ Use `print(n = ...)` to see more rows
#there are a few random points from Florida, they are being removed here
yelp_all <- yelp_all_list %>% bind_rows() %>% as_tibble()
yelp_all %>% print(width=1000)
## # A tibble: 17,283 × 16
## id alias
## <chr> <chr>
## 1 NJv8BeJRTYYQ9Yz2T23JaA densetsu-plano
## 2 d0dcwS7yajl_HGJf-z5NqA saucys-thai-and-pho-plano-plano
## 3 p-f1iTBbupSun6mEbsDECA covinos-pasta-and-pizza-plano
## 4 1rhWjKvf8MYEwrjbxflp1w bull-daddy-noodle-bistro-plano
## 5 4mPHnuj9Cy1wBWW1FI_ziw keds-ice-cream-waffles-and-crepes-plano
## 6 TFmrYQuUDb8p6tf-6V-Vbw lilys-cafe-plano
## 7 R4RZRviKKXBU6bJUJstkGg squares-hot-pot-plano
## 8 h7pf4HDYW0LLJuSWdUzpQw mudleaf-coffee-plano
## 9 0juKqFjPGm0G3Urk6oiyBg henrys-homemade-ice-cream-plano
## 10 AHAjkzGiZQV4UPv1CnF1Gg omgelato-plano-4
## name
## <chr>
## 1 Densetsu
## 2 Saucy's Thai & Pho - Plano
## 3 Covino's Pasta & Pizza
## 4 Bull Daddy Noodle Bistro
## 5 Ked's Ice Cream, Waffles & Crepes
## 6 Lily's Cafe
## 7 Squares Hot Pot
## 8 Mudleaf Coffee
## 9 Henry's Homemade Ice Cream
## 10 OMGelato
## image_url
## <chr>
## 1 https://s3-media4.fl.yelpcdn.com/bphoto/UNKO241hxzWv49N5dAvgtA/o.jpg
## 2 https://s3-media1.fl.yelpcdn.com/bphoto/ifJZbs408x6MkEC8Du7dew/o.jpg
## 3 https://s3-media3.fl.yelpcdn.com/bphoto/kd_fbh7Aq3LigJ_7HGnL3g/o.jpg
## 4 https://s3-media3.fl.yelpcdn.com/bphoto/zaDRBqL3OPC6v0bp9GMIrg/o.jpg
## 5 https://s3-media3.fl.yelpcdn.com/bphoto/zXyFbiBustWfvu8TlD1LfQ/o.jpg
## 6 https://s3-media3.fl.yelpcdn.com/bphoto/zmxXSG8GtQIfs9J_mgYYuQ/o.jpg
## 7 https://s3-media4.fl.yelpcdn.com/bphoto/FSXS-kj3q2hurH9jAlBZ2w/o.jpg
## 8 https://s3-media4.fl.yelpcdn.com/bphoto/XFr1iUCUQelR9qcfN7Pkng/o.jpg
## 9 https://s3-media4.fl.yelpcdn.com/bphoto/U4nC3eWsj0EyPQ28VP9xoQ/o.jpg
## 10 https://s3-media3.fl.yelpcdn.com/bphoto/ytkEp-4NLC6-6X8Kh9GNIw/o.jpg
## is_closed
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## url
## <chr>
## 1 https://www.yelp.com/biz/densetsu-plano?adjust_creative=bYB2eEjjKRvYZCw5ZxM2…
## 2 https://www.yelp.com/biz/saucys-thai-and-pho-plano-plano?adjust_creative=bYB…
## 3 https://www.yelp.com/biz/covinos-pasta-and-pizza-plano?adjust_creative=bYB2e…
## 4 https://www.yelp.com/biz/bull-daddy-noodle-bistro-plano?adjust_creative=bYB2…
## 5 https://www.yelp.com/biz/keds-ice-cream-waffles-and-crepes-plano?adjust_crea…
## 6 https://www.yelp.com/biz/lilys-cafe-plano?adjust_creative=bYB2eEjjKRvYZCw5Zx…
## 7 https://www.yelp.com/biz/squares-hot-pot-plano?adjust_creative=bYB2eEjjKRvYZ…
## 8 https://www.yelp.com/biz/mudleaf-coffee-plano?adjust_creative=bYB2eEjjKRvYZC…
## 9 https://www.yelp.com/biz/henrys-homemade-ice-cream-plano?adjust_creative=bYB…
## 10 https://www.yelp.com/biz/omgelato-plano-4?adjust_creative=bYB2eEjjKRvYZCw5Zx…
## review_count categories rating coordinates$latitude $longitude transactions
## <int> <list> <dbl> <dbl> <dbl> <list>
## 1 1462 <df [2 × 2]> 4 33.1 -96.8 <chr [2]>
## 2 477 <df [1 × 2]> 4.5 33.1 -96.8 <chr [2]>
## 3 258 <df [2 × 2]> 4 33.0 -96.8 <chr [1]>
## 4 322 <df [3 × 2]> 4 33.0 -96.8 <chr [2]>
## 5 297 <df [2 × 2]> 4 33.1 -96.8 <chr [1]>
## 6 218 <df [3 × 2]> 4.5 33.0 -96.8 <chr [2]>
## 7 194 <df [2 × 2]> 4.5 33.1 -96.8 <chr [0]>
## 8 237 <df [1 × 2]> 4 33.0 -96.8 <chr [1]>
## 9 306 <df [1 × 2]> 4 33.0 -96.8 <chr [0]>
## 10 62 <df [1 × 2]> 5 33.1 -96.8 <chr [2]>
## price location$address1 $address2 $address3 $city $zip_code $country
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 $$ 4152 W Spring Creek Pkwy "Ste 176" "" Plano 75024 US
## 2 $$ 4152 W Spring Creek Pkwy "" <NA> Plano 75024 US
## 3 $$ 3265 Independence Pkwy "" "" Plano 75075 US
## 4 $$ 3309 Coit Rd "Ste 100" "" Plano 75093 US
## 5 $$ 6205 Coit Rd "Ste 344" "" Plano 75024 US
## 6 $ 3100 Independence Pkwy "Ste 299" "" Plano 75075 US
## 7 $$ 4152 W Spring Creek Pkwy "Ste 108" <NA> Plano 75024 US
## 8 $$ 3100 Independence Pkwy "Ste 300" <NA> Plano 75075 US
## 9 $ 3100 Independence Pkwy "Ste 215" "" Plano 75075 US
## 10 <NA> 3829 W Springcreek Pkwy "Ste 103" "" Plano 75023 US
## $state $display_address phone display_phone distance
## <chr> <list> <chr> <chr> <dbl>
## 1 TX <chr [3]> +19729647875 (972) 964-7875 1328.
## 2 TX <chr [2]> +19723887650 (972) 388-7650 1356.
## 3 TX <chr [2]> +19725190345 (972) 519-0345 1118.
## 4 TX <chr [3]> +19725191088 (972) 519-1088 1187.
## 5 TX <chr [3]> +19726730285 (972) 673-0285 1382.
## 6 TX <chr [3]> +19723129732 (972) 312-9732 1319.
## 7 TX <chr [3]> +19725964357 (972) 596-4357 1366.
## 8 TX <chr [3]> +14699312233 (469) 931-2233 1299.
## 9 TX <chr [3]> +19726129949 (972) 612-9949 1315.
## 10 TX <chr [3]> +14693950903 (469) 395-0903 1247.
## # … with 17,273 more rows
## # ℹ Use `print(n = ...)` to see more rows
yelp_all<-yelp_all[!(yelp_all$location$state=="FL" ),]
yelp_sf <- yelp_all %>%
mutate(x = .$coordinates$longitude,
y = .$coordinates$latitude) %>%
filter(!is.na(x) & !is.na(y)) %>%
st_as_sf(coords = c("x", "y"), crs = 4326)
# Map
tm_shape(yelp_sf) +
tm_dots(col = "review_count", style="quantile")
How many businesses are there total?
How many businesses are there for each business catagory?
#flatten categories
unnest_wider(
data = yelp_all,
col = c(yelp_all$catagories),
names_sep = NULL,
simplify = TRUE,
strict = FALSE,
names_repair = "check_unique",
ptype = NULL,
transform = NULL
)
## Warning: Unknown or uninitialised column: `catagories`.
## # A tibble: 17,275 × 16
## id alias name image…¹ is_cl…² url revie…³ categ…⁴ rating coord…⁵
## <chr> <chr> <chr> <chr> <lgl> <chr> <int> <list> <dbl> <dbl>
## 1 NJv8BeJRTYY… dens… Dens… https:… FALSE http… 1462 <df> 4 33.1
## 2 d0dcwS7yajl… sauc… Sauc… https:… FALSE http… 477 <df> 4.5 33.1
## 3 p-f1iTBbupS… covi… Covi… https:… FALSE http… 258 <df> 4 33.0
## 4 1rhWjKvf8MY… bull… Bull… https:… FALSE http… 322 <df> 4 33.0
## 5 4mPHnuj9Cy1… keds… Ked'… https:… FALSE http… 297 <df> 4 33.1
## 6 TFmrYQuUDb8… lily… Lily… https:… FALSE http… 218 <df> 4.5 33.0
## 7 R4RZRviKKXB… squa… Squa… https:… FALSE http… 194 <df> 4.5 33.1
## 8 h7pf4HDYW0L… mudl… Mudl… https:… FALSE http… 237 <df> 4 33.0
## 9 0juKqFjPGm0… henr… Henr… https:… FALSE http… 306 <df> 4 33.0
## 10 AHAjkzGiZQV… omge… OMGe… https:… FALSE http… 62 <df> 5 33.1
## # … with 17,265 more rows, 7 more variables: coordinates$longitude <dbl>,
## # transactions <list>, price <chr>, location <df[,8]>, phone <chr>,
## # display_phone <chr>, distance <dbl>, and abbreviated variable names
## # ¹image_url, ²is_closed, ³review_count, ⁴categories, ⁵coordinates$latitude
## # ℹ Use `print(n = ...)` to see more rows, and `colnames()` to see all variable names
#find parks
z = str_detect(yelp_all$categories, "park")
## Warning in stri_detect_regex(string, pattern, negate = negate, opts_regex =
## opts(pattern)): argument is not an atomic vector; coercing
#find how many parks
sum(z) #546
## [1] 546
resteraunts = 17275 - 546 #16729
Upon visual inspection, can you see any noticeable spatial patterns to the way they are distributed across the county?