library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## Warning: package 'ggplot2' was built under R version 4.2.1
## Warning: package 'tibble' was built under R version 4.2.1
## Warning: package 'readr' was built under R version 4.2.1
## Warning: package 'dplyr' was built under R version 4.2.1
## Warning: package 'forcats' was built under R version 4.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(tidyr)
library(here)
## Warning: package 'here' was built under R version 4.2.1
## here() starts at D:/Georgia Tech/Spec topic_
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 4.2.1
library(sf)
## Warning: package 'sf' was built under R version 4.2.1
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
## Warning: package 'tmap' was built under R version 4.2.1
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 4.2.1
##
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
##
## flatten
library(tidyverse)
library(httr)
## Warning: package 'httr' was built under R version 4.2.1
library(jsonlite)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.2.1
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(here)
library(yelpr)
library(knitr)
## Warning: package 'knitr' was built under R version 4.2.1
tidycensus::census_api_key(Sys.getenv("google_api"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
install = TRUE
# Read a subset of Yelp data we downloaded last week
yelp_subset <- read_rds(here("D:/Georgia Tech/Spec topic_/yelp_all_4.rds"))
# Print to see what's inside
yelp_subset %>%
tibble() %>%
print(width = 1000)
## # A tibble: 39,702 × 17
## id alias name
## <chr> <chr> <chr>
## 1 PkOM7wJZzZ0DoxW84_uLDg edenvale-garden-park-san-jose Edenvale Garden Park
## 2 j-gBxd5Nkhr9iRHi_NJp7w great-oaks-park-san-jose Great Oaks Park
## 3 C41iNUrHTWYN9rUJSn_rJw chynoweth-park-san-jose Chynoweth Park
## 4 wvUCOyYuNcsc7g3OsMxWiA danna-rock-park-san-jose Danna Rock Park
## 5 sJ0RCNgqZ4nlLMl4LhKDmQ lake-cunningham-park-san-jose Lake Cunningham Park
## 6 R6Eb_p72vynnpq20aOLATg nisich-park-san-jose-2 Nisich Park
## 7 jcxoXMWrqreHQT0P5h1s9g welch-park-san-jose Welch Park
## 8 Y-tuulUyvCfDzzH4yV-WgQ hillview-park-san-jose Hillview Park
## 9 BCJx7oQ8sN55mcqMyWFnmg vieira-park-san-jose Vieira Park
## 10 DGSPySxKJJJfO6stJEF7Uw lincoln-glen-park-san-jose Lincoln Glen Park
## image_url
## <chr>
## 1 https://s3-media4.fl.yelpcdn.com/bphoto/yFTWpEtAU5xib85UvVq1pQ/o.jpg
## 2 https://s3-media4.fl.yelpcdn.com/bphoto/rcIYKSNUQ84vatIfw6_h7A/o.jpg
## 3 https://s3-media1.fl.yelpcdn.com/bphoto/gx6Ei11wytx_V215HF1gzg/o.jpg
## 4 https://s3-media3.fl.yelpcdn.com/bphoto/F0AQ22xR2eMrDiImNn66fA/o.jpg
## 5 https://s3-media3.fl.yelpcdn.com/bphoto/ZzHo_dwM5ar6CKV6TsxshQ/o.jpg
## 6 https://s3-media3.fl.yelpcdn.com/bphoto/ECTqvmIlk5LyKxKM7A-Row/o.jpg
## 7 https://s3-media4.fl.yelpcdn.com/bphoto/g5mj_1kJMixlfUYA2AgfWg/o.jpg
## 8 https://s3-media1.fl.yelpcdn.com/bphoto/p7Gg879Fu8GzS66cPeu9ig/o.jpg
## 9 https://s3-media2.fl.yelpcdn.com/bphoto/6s2pA1-_oklqy66z0EaKdQ/o.jpg
## 10 https://s3-media4.fl.yelpcdn.com/bphoto/qfk9XMXuvS_0GOHknXZL6w/o.jpg
## is_closed
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## url
## <chr>
## 1 https://www.yelp.com/biz/edenvale-garden-park-san-jose?adjust_creative=9FUT8…
## 2 https://www.yelp.com/biz/great-oaks-park-san-jose?adjust_creative=9FUT8HLBJS…
## 3 https://www.yelp.com/biz/chynoweth-park-san-jose?adjust_creative=9FUT8HLBJS3…
## 4 https://www.yelp.com/biz/danna-rock-park-san-jose?adjust_creative=9FUT8HLBJS…
## 5 https://www.yelp.com/biz/lake-cunningham-park-san-jose?adjust_creative=9FUT8…
## 6 https://www.yelp.com/biz/nisich-park-san-jose-2?adjust_creative=9FUT8HLBJS3n…
## 7 https://www.yelp.com/biz/welch-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7…
## 8 https://www.yelp.com/biz/hillview-park-san-jose?adjust_creative=9FUT8HLBJS3n…
## 9 https://www.yelp.com/biz/vieira-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ…
## 10 https://www.yelp.com/biz/lincoln-glen-park-san-jose?adjust_creative=9FUT8HLB…
## review_count categories rating coordinates$latitude $longitude transactions
## <int> <list> <dbl> <dbl> <dbl> <list>
## 1 39 <df [1 × 2]> 4 37.3 -122. <list [0]>
## 2 6 <df [1 × 2]> 3.5 37.3 -122. <list [0]>
## 3 4 <df [2 × 2]> 3 37.3 -122. <list [0]>
## 4 3 <df [1 × 2]> 2.5 37.3 -122. <list [0]>
## 5 95 <df [1 × 2]> 3.5 37.3 -122. <list [0]>
## 6 4 <df [1 × 2]> 4.5 37.3 -122. <list [0]>
## 7 7 <df [1 × 2]> 3 37.3 -122. <list [0]>
## 8 2 <df [2 × 2]> 4 37.3 -122. <list [0]>
## 9 22 <df [1 × 2]> 4 37.3 -122. <list [0]>
## 10 32 <df [1 × 2]> 4 37.3 -122. <list [0]>
## location$address1 $address2 $address3 $city $zip_code $country
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 200 Edenvale Ave "" "" San Jose 95136 US
## 2 5248 Snow Dr "" "" San Jose 95111 US
## 3 Chynoweth Ave & Edenvale Ave "" "" San Jose 95136 US
## 4 4524-4534 Houndshaven Way "" "" San Jose 95111 US
## 5 2305 S White Rd "" "" San Jose 95101 US
## 6 1401-1437 Suzay Ct "" "" San Jose 95122 US
## 7 Kenesta Wy "" "" San Jose 95122 US
## 8 Adrian Way & Ocala Ave "" "" San Jose 95122 US
## 9 700 Adeline Ave "" "" San Jose 95136 US
## 10 Radio Ave & Curtner Ave "" "" San Jose 95125 US
## $state $display_address phone display_phone distance price keys
## <chr> <list> <chr> <chr> <dbl> <chr> <chr>
## 1 CA <chr [2]> "" "" 526. <NA> p
## 2 CA <chr [2]> "+14087935510" "(408) 793-5510" 909. <NA> p
## 3 CA <chr [2]> "+14087935510" "(408) 793-5510" 683. <NA> p
## 4 CA <chr [2]> "" "" 1082. <NA> p
## 5 CA <chr [2]> "+14087935510" "(408) 793-5510" 1346. <NA> p
## 6 CA <chr [2]> "" "" 1804. <NA> p
## 7 CA <chr [2]> "" "" 689. <NA> p
## 8 CA <chr [2]> "+14087935510" "(408) 793-5510" 850. <NA> p
## 9 CA <chr [2]> "" "" 1063. <NA> p
## 10 CA <chr [2]> "+14085353570" "(408) 535-3570" 1553. <NA> p
## # … with 39,692 more rows
yelp_subset$coordinates %>% head()
## latitude longitude
## 1 37.26251 -121.8203
## 2 37.26807 -121.8057
## 3 37.25949 -121.8188
## 4 37.27216 -121.8234
## 5 37.33745 -121.8089
## 6 37.31825 -121.8339
yelp_flat <- yelp_subset %>%
jsonlite::flatten() %>%
as_tibble()
yelp_flat$coordinates %>% head()
## Warning: Unknown or uninitialised column: `coordinates`.
## NULL
# Concatenate what's inside the list
yelp_concat <- yelp_flat %>%
mutate(transactions = transactions %>%
map_chr(., function(x) str_c(x, collapse=", ")),
location.display_address = location.display_address %>%
map_chr(., function(x) str_c(x, collapse=", ")))
# Custom function that takes the data frame in "categories" column in Yelp data
# and returns a character vector
concate_list <- function(x){
# x is a data frame with columns "alias" and "title" from Yelp$categories
# returns a character vector containing category concatenated titles
titles <- x[["title"]] %>% str_c(collapse = ", ")
return(titles)
}
yelp_flat2 <- yelp_concat %>%
mutate(categories = categories %>% map_chr(concate_list))
yelp_flat2 %>% print(width = 1000)
## # A tibble: 39,702 × 25
## id alias name
## <chr> <chr> <chr>
## 1 PkOM7wJZzZ0DoxW84_uLDg edenvale-garden-park-san-jose Edenvale Garden Park
## 2 j-gBxd5Nkhr9iRHi_NJp7w great-oaks-park-san-jose Great Oaks Park
## 3 C41iNUrHTWYN9rUJSn_rJw chynoweth-park-san-jose Chynoweth Park
## 4 wvUCOyYuNcsc7g3OsMxWiA danna-rock-park-san-jose Danna Rock Park
## 5 sJ0RCNgqZ4nlLMl4LhKDmQ lake-cunningham-park-san-jose Lake Cunningham Park
## 6 R6Eb_p72vynnpq20aOLATg nisich-park-san-jose-2 Nisich Park
## 7 jcxoXMWrqreHQT0P5h1s9g welch-park-san-jose Welch Park
## 8 Y-tuulUyvCfDzzH4yV-WgQ hillview-park-san-jose Hillview Park
## 9 BCJx7oQ8sN55mcqMyWFnmg vieira-park-san-jose Vieira Park
## 10 DGSPySxKJJJfO6stJEF7Uw lincoln-glen-park-san-jose Lincoln Glen Park
## image_url
## <chr>
## 1 https://s3-media4.fl.yelpcdn.com/bphoto/yFTWpEtAU5xib85UvVq1pQ/o.jpg
## 2 https://s3-media4.fl.yelpcdn.com/bphoto/rcIYKSNUQ84vatIfw6_h7A/o.jpg
## 3 https://s3-media1.fl.yelpcdn.com/bphoto/gx6Ei11wytx_V215HF1gzg/o.jpg
## 4 https://s3-media3.fl.yelpcdn.com/bphoto/F0AQ22xR2eMrDiImNn66fA/o.jpg
## 5 https://s3-media3.fl.yelpcdn.com/bphoto/ZzHo_dwM5ar6CKV6TsxshQ/o.jpg
## 6 https://s3-media3.fl.yelpcdn.com/bphoto/ECTqvmIlk5LyKxKM7A-Row/o.jpg
## 7 https://s3-media4.fl.yelpcdn.com/bphoto/g5mj_1kJMixlfUYA2AgfWg/o.jpg
## 8 https://s3-media1.fl.yelpcdn.com/bphoto/p7Gg879Fu8GzS66cPeu9ig/o.jpg
## 9 https://s3-media2.fl.yelpcdn.com/bphoto/6s2pA1-_oklqy66z0EaKdQ/o.jpg
## 10 https://s3-media4.fl.yelpcdn.com/bphoto/qfk9XMXuvS_0GOHknXZL6w/o.jpg
## is_closed
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## url
## <chr>
## 1 https://www.yelp.com/biz/edenvale-garden-park-san-jose?adjust_creative=9FUT8…
## 2 https://www.yelp.com/biz/great-oaks-park-san-jose?adjust_creative=9FUT8HLBJS…
## 3 https://www.yelp.com/biz/chynoweth-park-san-jose?adjust_creative=9FUT8HLBJS3…
## 4 https://www.yelp.com/biz/danna-rock-park-san-jose?adjust_creative=9FUT8HLBJS…
## 5 https://www.yelp.com/biz/lake-cunningham-park-san-jose?adjust_creative=9FUT8…
## 6 https://www.yelp.com/biz/nisich-park-san-jose-2?adjust_creative=9FUT8HLBJS3n…
## 7 https://www.yelp.com/biz/welch-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7…
## 8 https://www.yelp.com/biz/hillview-park-san-jose?adjust_creative=9FUT8HLBJS3n…
## 9 https://www.yelp.com/biz/vieira-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ…
## 10 https://www.yelp.com/biz/lincoln-glen-park-san-jose?adjust_creative=9FUT8HLB…
## review_count categories rating transactions phone
## <int> <chr> <dbl> <chr> <chr>
## 1 39 Parks 4 "" ""
## 2 6 Parks 3.5 "" "+14087935510"
## 3 4 Parks, Playgrounds 3 "" "+14087935510"
## 4 3 Parks 2.5 "" ""
## 5 95 Parks 3.5 "" "+14087935510"
## 6 4 Parks 4.5 "" ""
## 7 7 Parks 3 "" ""
## 8 2 Parks, Playgrounds 4 "" "+14087935510"
## 9 22 Parks 4 "" ""
## 10 32 Parks 4 "" "+14085353570"
## display_phone distance price keys coordinates.latitude
## <chr> <dbl> <chr> <chr> <dbl>
## 1 "" 526. <NA> p 37.3
## 2 "(408) 793-5510" 909. <NA> p 37.3
## 3 "(408) 793-5510" 683. <NA> p 37.3
## 4 "" 1082. <NA> p 37.3
## 5 "(408) 793-5510" 1346. <NA> p 37.3
## 6 "" 1804. <NA> p 37.3
## 7 "" 689. <NA> p 37.3
## 8 "(408) 793-5510" 850. <NA> p 37.3
## 9 "" 1063. <NA> p 37.3
## 10 "(408) 535-3570" 1553. <NA> p 37.3
## coordinates.longitude location.address1 location.address2
## <dbl> <chr> <chr>
## 1 -122. 200 Edenvale Ave ""
## 2 -122. 5248 Snow Dr ""
## 3 -122. Chynoweth Ave & Edenvale Ave ""
## 4 -122. 4524-4534 Houndshaven Way ""
## 5 -122. 2305 S White Rd ""
## 6 -122. 1401-1437 Suzay Ct ""
## 7 -122. Kenesta Wy ""
## 8 -122. Adrian Way & Ocala Ave ""
## 9 -122. 700 Adeline Ave ""
## 10 -122. Radio Ave & Curtner Ave ""
## location.address3 location.city location.zip_code location.country
## <chr> <chr> <chr> <chr>
## 1 "" San Jose 95136 US
## 2 "" San Jose 95111 US
## 3 "" San Jose 95136 US
## 4 "" San Jose 95111 US
## 5 "" San Jose 95101 US
## 6 "" San Jose 95122 US
## 7 "" San Jose 95122 US
## 8 "" San Jose 95122 US
## 9 "" San Jose 95136 US
## 10 "" San Jose 95125 US
## location.state location.display_address
## <chr> <chr>
## 1 CA 200 Edenvale Ave, San Jose, CA 95136
## 2 CA 5248 Snow Dr, San Jose, CA 95111
## 3 CA Chynoweth Ave & Edenvale Ave, San Jose, CA 95136
## 4 CA 4524-4534 Houndshaven Way, San Jose, CA 95111
## 5 CA 2305 S White Rd, San Jose, CA 95101
## 6 CA 1401-1437 Suzay Ct, San Jose, CA 95122
## 7 CA Kenesta Wy, San Jose, CA 95122
## 8 CA Adrian Way & Ocala Ave, San Jose, CA 95122
## 9 CA 700 Adeline Ave, San Jose, CA 95136
## 10 CA Radio Ave & Curtner Ave, San Jose, CA 95125
## # … with 39,692 more rows
# Read the full data
my_yelp <- read_rds(here("D:/Georgia Tech/Spec topic_/yelp_all_4.rds"))
# Issue 2 ------------------------------
yelp_unique <- my_yelp %>%
distinct(id, .keep_all=T)
glue::glue("Before dropping NA, there were {nrow(my_yelp)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>%
print()
## Before dropping NA, there were 39702 rows. After dropping them, there are 5551 rows
# Issue 3 ------------------------------
yelp_flat <- yelp_unique %>%
# 1. Flattening columns with data frame
jsonlite::flatten() %>%
# 2. Handling list-columns
mutate(transactions = transactions %>%
map_chr(., function(x) str_c(x, collapse=", ")),
location.display_address = location.display_address %>%
map_chr(., function(x) str_c(x, collapse=", ")),
categories = categories %>% map_chr(concate_list)) # concate_list is the custom function
# Issue 4 ------------------------------
yelp_flat %>%
map_dbl(., function(x) sum(is.na(x)))
## id alias name
## 0 0 0
## image_url is_closed url
## 0 0 0
## review_count categories rating
## 0 0 0
## transactions phone display_phone
## 0 0 0
## distance price keys
## 0 1650 0
## coordinates.latitude coordinates.longitude location.address1
## 2 2 73
## location.address2 location.address3 location.city
## 1146 1327 0
## location.zip_code location.country location.state
## 0 0 0
## location.display_address
## 0
# Fist, let's verify that the 4 missing values in lat/long columns are in the same rows.
identical(is.na(yelp_flat$coordinates.latitude),
is.na(yelp_flat$coordinates.longitude)) # Yes, they are in the same 4 rows.
## [1] TRUE
# Drop them.
yelp_dropna1 <- yelp_flat %>%
drop_na(coordinates.longitude)
# Dropping NAs in price
yelp_dropna2 <- yelp_dropna1 %>%
drop_na(price)
head(yelp_dropna2)
## id alias
## 1 cToVW-dCXcxhqFQX_xSc-w orchard-heritage-park-sunnyvale
## 2 Qbuy0qvdlXaXKDOpaDnCBg pho-bowl-san-jose-4
## 3 pyCLzdHoqs3ndmGV1vPamA mingles-mango-san-jose-2
## 4 nm7YhFGOMAw1ojcX32RmpQ tacos-el-pollo-y-el-pollito-san-jose
## 5 Iw0u4il3SpX7rtY0ookQdA puro-michoacan-restaurant-san-jose-2
## 6 WE1O8_MfVq4kM4G1OVAP7g trines-cafe-6-san-jose
## name
## 1 Orchard Heritage Park
## 2 Pho Bowl
## 3 Mingle's Mango
## 4 Tacos El Pollo Y El Pollito
## 5 Puro Michoacan Restaurant
## 6 Trines Cafe 6
## image_url
## 1 https://s3-media2.fl.yelpcdn.com/bphoto/HSOJ1H2XNrLBhlATnPHoKg/o.jpg
## 2 https://s3-media2.fl.yelpcdn.com/bphoto/OwRnJ_0kxA_glrocLu1spg/o.jpg
## 3 https://s3-media3.fl.yelpcdn.com/bphoto/QZSPNHxnVyu9KBwDEqbkbw/o.jpg
## 4 https://s3-media2.fl.yelpcdn.com/bphoto/2OcE1RSGx8MEFtfj566Hew/o.jpg
## 5 https://s3-media2.fl.yelpcdn.com/bphoto/tPx6AVzj0Xh7SnKt1jalEQ/o.jpg
## 6 https://s3-media2.fl.yelpcdn.com/bphoto/KFiqnd1ETApqoPgSq_0Ebg/o.jpg
## is_closed
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## url
## 1 https://www.yelp.com/biz/orchard-heritage-park-sunnyvale?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2 https://www.yelp.com/biz/pho-bowl-san-jose-4?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 3 https://www.yelp.com/biz/mingles-mango-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 4 https://www.yelp.com/biz/tacos-el-pollo-y-el-pollito-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 5 https://www.yelp.com/biz/puro-michoacan-restaurant-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 6 https://www.yelp.com/biz/trines-cafe-6-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## review_count categories rating transactions
## 1 8 Museums, Parks, Gift Shops 5
## 2 382 Vietnamese, Soup, Noodles 4 delivery
## 3 362 Asian Fusion, Dim Sum, Noodles 4 pickup, delivery
## 4 65 Mexican, Food Trucks 4 delivery
## 5 38 Mexican 4
## 6 68 Mexican 4 delivery
## phone display_phone distance price keys coordinates.latitude
## 1 +14087490220 (408) 749-0220 1531.2446 $$ p 37.35776
## 2 +14082818288 (408) 281-8288 881.1513 $ a 37.25979
## 3 +14083001425 (408) 300-1425 778.2162 $$ a 37.26058
## 4 +14085092940 (408) 509-2940 702.4299 $ a 37.26053
## 5 +16692347944 (669) 234-7944 262.5733 $$ a 37.26297
## 6 +14082243243 (408) 224-3243 889.1281 $$ a 37.25960
## coordinates.longitude location.address1 location.address2 location.address3
## 1 -122.0268 560 E Remington Dr <NA>
## 2 -121.8077 5316 Monterey Hwy
## 3 -121.8083 Monterey Rd D Ste D
## 4 -121.8096 5270 Monterey Hwy
## 5 -121.8136 5138 Monterey Hwy Ste A <NA>
## 6 -121.8078 5304 Monterey Hwy
## location.city location.zip_code location.country location.state
## 1 Sunnyvale 94087 US CA
## 2 San Jose 95111 US CA
## 3 San Jose 95111 US CA
## 4 San Jose 95111 US CA
## 5 San Jose 95111 US CA
## 6 San Jose 95111 US CA
## location.display_address
## 1 560 E Remington Dr, Sunnyvale, CA 94087
## 2 5316 Monterey Hwy, San Jose, CA 95111
## 3 Monterey Rd D, Ste D, San Jose, CA 95111
## 4 5270 Monterey Hwy, San Jose, CA 95111
## 5 5138 Monterey Hwy, Ste A, San Jose, CA 95111
## 6 5304 Monterey Hwy, San Jose, CA 95111
summary(yelp_dropna1$price)
## Length Class Mode
## 5549 character character
# census boundary
#census_a <- st_read("https://raw.githubusercontent.com/BonwooKoo/UrbanAnalytics2022/main/Lab/module_0/testdata.geojson")
#getting the tract data
census_1 <- suppressMessages(
get_acs(geography = "tract", # or "block group", "county", "state" etc.
state = "CA",
county = c("Santa Clara"),
variables = c(hhincome = 'B19019_001',
race.tot = "B02001_001",
race.white = "B02001_002",
race.black = 'B02001_003'
),
year = 2019,
survey = "acs5", # American Community Survey 5-year estimate
geometry = TRUE, # returns sf objects
output = "wide") # wide vs. long
)
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============= | 19%
|
|============== | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|==================================================================== | 98%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
census_1_s <- separate(data= census_1, col=NAME, into= c('tract', 'county', 'state'), sep=",")
head(census_1_s)
## Simple feature collection with 6 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -121.8832 ymin: 37.2602 xmax: -121.7828 ymax: 37.34074
## Geodetic CRS: NAD83
## GEOID tract county state hhincomeE
## 1 06085512042 Census Tract 5120.42 Santa Clara County California 78382
## 2 06085503306 Census Tract 5033.06 Santa Clara County California 87361
## 3 06085503108 Census Tract 5031.08 Santa Clara County California 111618
## 4 06085503323 Census Tract 5033.23 Santa Clara County California 135877
## 5 06085503401 Census Tract 5034.01 Santa Clara County California 79318
## 6 06085512020 Census Tract 5120.20 Santa Clara County California 102401
## hhincomeM race.totE race.totM race.whiteE race.whiteM race.blackE race.blackM
## 1 3798 3020 292 878 173 151 79
## 2 14909 4373 379 1349 443 86 71
## 3 23929 8222 627 3301 619 637 211
## 4 14938 4763 402 1308 458 251 155
## 5 17404 4468 431 1202 366 75 65
## 6 10813 7825 879 4039 902 167 106
## geometry
## 1 MULTIPOLYGON (((-121.82 37....
## 2 MULTIPOLYGON (((-121.8318 3...
## 3 MULTIPOLYGON (((-121.8832 3...
## 4 MULTIPOLYGON (((-121.8006 3...
## 5 MULTIPOLYGON (((-121.8455 3...
## 6 MULTIPOLYGON (((-121.8419 3...
yelp_sf <- yelp_dropna1 %>% st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
# sf subsets
yelp_in <- yelp_sf[census_1_s %>% st_transform(4326) %>%
filter(county %in% c(" Santa Clara County")) %>%
st_union(), ,op = st_intersects]
nrow(yelp_in)
## [1] 5291
glue::glue("nrow before: {nrow(my_yelp)} -> nrow after: {nrow(yelp_in)} \n
ncol before: {ncol(my_yelp)} -> ncol after: {ncol(yelp_in)} \n") %>%
print()
## nrow before: 39702 -> nrow after: 5291
##
## ncol before: 17 -> ncol after: 24
# Visualize
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(yelp_in) + tm_dots(col = "price")
# census is currently sfc. Convert it to sf.
census_sf <- census_1 %>% st_sf()
st_crs(census_sf) <- 4326
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
# Spatial join
census_yelp <- st_join(census_sf, yelp_in, join = st_intersects) #%>% st_transform(4326)
yelp_census <- st_join(yelp_in, census_sf, join = st_intersects)
# View
census_yelp %>% head()
## Simple feature collection with 6 features and 33 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -121.8318 ymin: 37.2602 xmax: -121.8083 ymax: 37.34074
## Geodetic CRS: WGS 84
## GEOID NAME hhincomeE
## 1 06085512042 Census Tract 5120.42, Santa Clara County, California 78382
## 2 06085503306 Census Tract 5033.06, Santa Clara County, California 87361
## 2.1 06085503306 Census Tract 5033.06, Santa Clara County, California 87361
## 2.2 06085503306 Census Tract 5033.06, Santa Clara County, California 87361
## 2.3 06085503306 Census Tract 5033.06, Santa Clara County, California 87361
## 2.4 06085503306 Census Tract 5033.06, Santa Clara County, California 87361
## hhincomeM race.totE race.totM race.whiteE race.whiteM race.blackE
## 1 3798 3020 292 878 173 151
## 2 14909 4373 379 1349 443 86
## 2.1 14909 4373 379 1349 443 86
## 2.2 14909 4373 379 1349 443 86
## 2.3 14909 4373 379 1349 443 86
## 2.4 14909 4373 379 1349 443 86
## race.blackM id
## 1 79 Iw0u4il3SpX7rtY0ookQdA
## 2 71 jcxoXMWrqreHQT0P5h1s9g
## 2.1 71 lZUd_rDdO5FyZQZfKjGqZw
## 2.2 71 4XaESbCqIsmETYxpPiv9Lg
## 2.3 71 NlYprq0SB1tNdgWv1GpXdA
## 2.4 71 smFno58h21Rd4RihK87xvw
## alias
## 1 puro-michoacan-restaurant-san-jose-2
## 2 welch-park-san-jose
## 2.1 cha-ca-long-phung-san-jose-2
## 2.2 mexican-style-churros-san-jose-2
## 2.3 que-ta-banh-canh-trang-bang-udon-noodle-soup-san-jose
## 2.4 taste-of-persia-san-jose
## name
## 1 Puro Michoacan Restaurant
## 2 Welch Park
## 2.1 Cha Ca Long Phung
## 2.2 Mexican Style Churros
## 2.3 Que Ta Banh Canh Trang Bang - Udon Noodle Soup
## 2.4 Taste of Persia
## image_url
## 1 https://s3-media2.fl.yelpcdn.com/bphoto/tPx6AVzj0Xh7SnKt1jalEQ/o.jpg
## 2 https://s3-media4.fl.yelpcdn.com/bphoto/g5mj_1kJMixlfUYA2AgfWg/o.jpg
## 2.1 https://s3-media1.fl.yelpcdn.com/bphoto/HF_-0eyrZQ9_-lKVjNpUyg/o.jpg
## 2.2 https://s3-media3.fl.yelpcdn.com/bphoto/L4IDuy_D2wpBwiSC0P6qzw/o.jpg
## 2.3 https://s3-media1.fl.yelpcdn.com/bphoto/ykTpTBey8xmauZpnv3s22A/o.jpg
## 2.4 https://s3-media2.fl.yelpcdn.com/bphoto/6g5XM0Pgi3qIYv9PnkAESw/o.jpg
## is_closed
## 1 FALSE
## 2 FALSE
## 2.1 FALSE
## 2.2 FALSE
## 2.3 FALSE
## 2.4 FALSE
## url
## 1 https://www.yelp.com/biz/puro-michoacan-restaurant-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2 https://www.yelp.com/biz/welch-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.1 https://www.yelp.com/biz/cha-ca-long-phung-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.2 https://www.yelp.com/biz/mexican-style-churros-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.3 https://www.yelp.com/biz/que-ta-banh-canh-trang-bang-udon-noodle-soup-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.4 https://www.yelp.com/biz/taste-of-persia-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## review_count categories rating transactions
## 1 38 Mexican 4.0
## 2 7 Parks 3.0
## 2.1 200 Vietnamese 4.0
## 2.2 52 Desserts, Mexican 5.0
## 2.3 192 Vietnamese, Seafood, Noodles 3.5 pickup, delivery
## 2.4 82 Mediterranean, Persian/Iranian 4.0 pickup, delivery
## phone display_phone distance price keys location.address1
## 1 +16692347944 (669) 234-7944 262.5733 $$ a 5138 Monterey Hwy
## 2 689.0162 <NA> p Kenesta Wy
## 2.1 +14082549941 (408) 254-9941 533.5555 $ a 2145 Tully Rd
## 2.2 +14085129594 (408) 512-9594 757.5121 $ a 1812 Cunningham Ave
## 2.3 +14082591445 (408) 259-1445 603.5719 $$ a 2005 Tully Rd
## 2.4 +14084935978 (408) 493-5978 597.6317 $$ a 2011 Tully Rd
## location.address2 location.address3 location.city location.zip_code
## 1 Ste A <NA> San Jose 95111
## 2 San Jose 95122
## 2.1 San Jose 95122
## 2.2 <NA> San Jose 95122
## 2.3 <NA> San Jose 95122
## 2.4 <NA> San Jose 95122
## location.country location.state
## 1 US CA
## 2 US CA
## 2.1 US CA
## 2.2 US CA
## 2.3 US CA
## 2.4 US CA
## location.display_address geometry
## 1 5138 Monterey Hwy, Ste A, San Jose, CA 95111 MULTIPOLYGON (((-121.82 37....
## 2 Kenesta Wy, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.1 2145 Tully Rd, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.2 1812 Cunningham Ave, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.3 2005 Tully Rd, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.4 2011 Tully Rd, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
yelp_census %>% head()
## Simple feature collection with 6 features and 33 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -121.8339 ymin: 37.25949 xmax: -121.8057 ymax: 37.33745
## Geodetic CRS: WGS 84
## id alias name
## 1 PkOM7wJZzZ0DoxW84_uLDg edenvale-garden-park-san-jose Edenvale Garden Park
## 2 j-gBxd5Nkhr9iRHi_NJp7w great-oaks-park-san-jose Great Oaks Park
## 3 C41iNUrHTWYN9rUJSn_rJw chynoweth-park-san-jose Chynoweth Park
## 4 wvUCOyYuNcsc7g3OsMxWiA danna-rock-park-san-jose Danna Rock Park
## 5 sJ0RCNgqZ4nlLMl4LhKDmQ lake-cunningham-park-san-jose Lake Cunningham Park
## 6 R6Eb_p72vynnpq20aOLATg nisich-park-san-jose-2 Nisich Park
## image_url
## 1 https://s3-media4.fl.yelpcdn.com/bphoto/yFTWpEtAU5xib85UvVq1pQ/o.jpg
## 2 https://s3-media4.fl.yelpcdn.com/bphoto/rcIYKSNUQ84vatIfw6_h7A/o.jpg
## 3 https://s3-media1.fl.yelpcdn.com/bphoto/gx6Ei11wytx_V215HF1gzg/o.jpg
## 4 https://s3-media3.fl.yelpcdn.com/bphoto/F0AQ22xR2eMrDiImNn66fA/o.jpg
## 5 https://s3-media3.fl.yelpcdn.com/bphoto/ZzHo_dwM5ar6CKV6TsxshQ/o.jpg
## 6 https://s3-media3.fl.yelpcdn.com/bphoto/ECTqvmIlk5LyKxKM7A-Row/o.jpg
## is_closed
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## url
## 1 https://www.yelp.com/biz/edenvale-garden-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2 https://www.yelp.com/biz/great-oaks-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 3 https://www.yelp.com/biz/chynoweth-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 4 https://www.yelp.com/biz/danna-rock-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 5 https://www.yelp.com/biz/lake-cunningham-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 6 https://www.yelp.com/biz/nisich-park-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## review_count categories rating transactions phone
## 1 39 Parks 4.0
## 2 6 Parks 3.5 +14087935510
## 3 4 Parks, Playgrounds 3.0 +14087935510
## 4 3 Parks 2.5
## 5 95 Parks 3.5 +14087935510
## 6 4 Parks 4.5
## display_phone distance price keys location.address1
## 1 526.0228 <NA> p 200 Edenvale Ave
## 2 (408) 793-5510 909.4110 <NA> p 5248 Snow Dr
## 3 (408) 793-5510 682.7830 <NA> p Chynoweth Ave & Edenvale Ave
## 4 1082.4227 <NA> p 4524-4534 Houndshaven Way
## 5 (408) 793-5510 1346.1879 <NA> p 2305 S White Rd
## 6 1803.6135 <NA> p 1401-1437 Suzay Ct
## location.address2 location.address3 location.city location.zip_code
## 1 San Jose 95136
## 2 San Jose 95111
## 3 San Jose 95136
## 4 San Jose 95111
## 5 San Jose 95101
## 6 San Jose 95122
## location.country location.state
## 1 US CA
## 2 US CA
## 3 US CA
## 4 US CA
## 5 US CA
## 6 US CA
## location.display_address GEOID
## 1 200 Edenvale Ave, San Jose, CA 95136 06085512021
## 2 5248 Snow Dr, San Jose, CA 95111 06085512017
## 3 Chynoweth Ave & Edenvale Ave, San Jose, CA 95136 06085512021
## 4 4524-4534 Houndshaven Way, San Jose, CA 95111 06085512017
## 5 2305 S White Rd, San Jose, CA 95101 06085503321
## 6 1401-1437 Suzay Ct, San Jose, CA 95122 06085503111
## NAME hhincomeE hhincomeM
## 1 Census Tract 5120.21, Santa Clara County, California 130460 7700
## 2 Census Tract 5120.17, Santa Clara County, California 92855 17920
## 3 Census Tract 5120.21, Santa Clara County, California 130460 7700
## 4 Census Tract 5120.17, Santa Clara County, California 92855 17920
## 5 Census Tract 5033.21, Santa Clara County, California 156667 28278
## 6 Census Tract 5031.11, Santa Clara County, California 99423 18943
## race.totE race.totM race.whiteE race.whiteM race.blackE race.blackM
## 1 6491 502 3331 469 275 225
## 2 7565 781 3316 634 196 175
## 3 6491 502 3331 469 275 225
## 4 7565 781 3316 634 196 175
## 5 4690 355 419 182 44 49
## 6 5132 483 1167 444 17 27
## geometry
## 1 POINT (-121.8203 37.26251)
## 2 POINT (-121.8057 37.26807)
## 3 POINT (-121.8188 37.25949)
## 4 POINT (-121.8234 37.27216)
## 5 POINT (-121.8089 37.33745)
## 6 POINT (-121.8339 37.31825)
tm_shape(census_yelp %>% group_by(GEOID) %>% summarise(rating=mean(rating))) +
tm_polygons(col = "rating", style = "quantile")
tm_shape(yelp_census) + tm_dots(col="hhincomeE")
yelp_in %>%
# Use mutate bc the re-coded variable is a new variable
mutate(review_count_binary = case_when(review_count > 1000 ~ "many",
review_count <= 1000 ~ "few")) %>%
# Select these two columns to simplify the print out
select(review_count, review_count_binary) %>%
head()
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -121.8339 ymin: 37.25949 xmax: -121.8057 ymax: 37.33745
## Geodetic CRS: WGS 84
## review_count review_count_binary geometry
## 1 39 few POINT (-121.8203 37.26251)
## 2 6 few POINT (-121.8057 37.26807)
## 3 4 few POINT (-121.8188 37.25949)
## 4 3 few POINT (-121.8234 37.27216)
## 5 95 few POINT (-121.8089 37.33745)
## 6 4 few POINT (-121.8339 37.31825)
yelp_in %>%
mutate(across(is.numeric, scale)) %>%
select(is.numeric)
## Warning: Predicate functions must be wrapped in `where()`.
##
## # Bad
## data %>% select(is.numeric)
##
## # Good
## data %>% select(where(is.numeric))
##
## ℹ Please update your code.
## This message is displayed once per session.
## Simple feature collection with 5291 features and 3 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -122.1847 ymin: 36.95046 xmax: -121.52 ymax: 37.45962
## Geodetic CRS: WGS 84
## First 10 features:
## review_count rating distance geometry
## 1 -0.5120726 0.4815183 -0.50796146 POINT (-121.8203 37.26251)
## 2 -0.5844259 -0.1124927 -0.32989489 POINT (-121.8057 37.26807)
## 3 -0.5888110 -0.7065038 -0.43515339 POINT (-121.8188 37.25949)
## 4 -0.5910035 -1.3005149 -0.24953872 POINT (-121.8234 37.27216)
## 5 -0.3892912 -0.1124927 -0.12703162 POINT (-121.8089 37.33745)
## 6 -0.5888110 1.0755294 0.08542202 POINT (-121.8339 37.31825)
## 7 -0.5822334 -0.7065038 -0.43225838 POINT (-121.8239 37.3255)
## 8 -0.5931960 0.4815183 -0.35756898 POINT (-121.8254 37.33876)
## 9 -0.5493455 0.4815183 -0.25870205 POINT (-121.8614 37.28697)
## 10 -0.5274203 0.4815183 -0.03095143 POINT (-121.8896 37.28915)