library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## Warning: package 'ggplot2' was built under R version 4.2.1
## Warning: package 'tibble' was built under R version 4.2.1
## Warning: package 'readr' was built under R version 4.2.1
## Warning: package 'dplyr' was built under R version 4.2.1
## Warning: package 'forcats' was built under R version 4.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidyr)
library(here)
## Warning: package 'here' was built under R version 4.2.1
## here() starts at D:/Georgia Tech/Spec topic_
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 4.2.1
library(sf)
## Warning: package 'sf' was built under R version 4.2.1
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
## Warning: package 'tmap' was built under R version 4.2.1
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 4.2.1
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
library(tidyverse)
library(httr)
## Warning: package 'httr' was built under R version 4.2.1
library(jsonlite)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.2.1
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(here)
library(yelpr)
library(knitr)
## Warning: package 'knitr' was built under R version 4.2.1
tidycensus::census_api_key(Sys.getenv("google_api"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
install = TRUE
# Read a subset of Yelp data we downloaded last week
yelp_subset <- read_rds(here("D:/Georgia Tech/Spec topic_/yelp_all_4.rds"))
# Print to see what's inside
yelp_subset %>% 
  tibble() %>% 
  print(width = 1000)
## # A tibble: 39,702 × 17
##    id                     alias                         name                
##    <chr>                  <chr>                         <chr>               
##  1 PkOM7wJZzZ0DoxW84_uLDg edenvale-garden-park-san-jose Edenvale Garden Park
##  2 j-gBxd5Nkhr9iRHi_NJp7w great-oaks-park-san-jose      Great Oaks Park     
##  3 C41iNUrHTWYN9rUJSn_rJw chynoweth-park-san-jose       Chynoweth Park      
##  4 wvUCOyYuNcsc7g3OsMxWiA danna-rock-park-san-jose      Danna Rock Park     
##  5 sJ0RCNgqZ4nlLMl4LhKDmQ lake-cunningham-park-san-jose Lake Cunningham Park
##  6 R6Eb_p72vynnpq20aOLATg nisich-park-san-jose-2        Nisich Park         
##  7 jcxoXMWrqreHQT0P5h1s9g welch-park-san-jose           Welch Park          
##  8 Y-tuulUyvCfDzzH4yV-WgQ hillview-park-san-jose        Hillview Park       
##  9 BCJx7oQ8sN55mcqMyWFnmg vieira-park-san-jose          Vieira Park         
## 10 DGSPySxKJJJfO6stJEF7Uw lincoln-glen-park-san-jose    Lincoln Glen Park   
##    image_url                                                           
##    <chr>                                                               
##  1 https://s3-media4.fl.yelpcdn.com/bphoto/yFTWpEtAU5xib85UvVq1pQ/o.jpg
##  2 https://s3-media4.fl.yelpcdn.com/bphoto/rcIYKSNUQ84vatIfw6_h7A/o.jpg
##  3 https://s3-media1.fl.yelpcdn.com/bphoto/gx6Ei11wytx_V215HF1gzg/o.jpg
##  4 https://s3-media3.fl.yelpcdn.com/bphoto/F0AQ22xR2eMrDiImNn66fA/o.jpg
##  5 https://s3-media3.fl.yelpcdn.com/bphoto/ZzHo_dwM5ar6CKV6TsxshQ/o.jpg
##  6 https://s3-media3.fl.yelpcdn.com/bphoto/ECTqvmIlk5LyKxKM7A-Row/o.jpg
##  7 https://s3-media4.fl.yelpcdn.com/bphoto/g5mj_1kJMixlfUYA2AgfWg/o.jpg
##  8 https://s3-media1.fl.yelpcdn.com/bphoto/p7Gg879Fu8GzS66cPeu9ig/o.jpg
##  9 https://s3-media2.fl.yelpcdn.com/bphoto/6s2pA1-_oklqy66z0EaKdQ/o.jpg
## 10 https://s3-media4.fl.yelpcdn.com/bphoto/qfk9XMXuvS_0GOHknXZL6w/o.jpg
##    is_closed
##    <lgl>    
##  1 FALSE    
##  2 FALSE    
##  3 FALSE    
##  4 FALSE    
##  5 FALSE    
##  6 FALSE    
##  7 FALSE    
##  8 FALSE    
##  9 FALSE    
## 10 FALSE    
##    url                                                                          
##    <chr>                                                                        
##  1 https://www.yelp.com/biz/edenvale-garden-park-san-jose?adjust_creative=9FUT8…
##  2 https://www.yelp.com/biz/great-oaks-park-san-jose?adjust_creative=9FUT8HLBJS…
##  3 https://www.yelp.com/biz/chynoweth-park-san-jose?adjust_creative=9FUT8HLBJS3…
##  4 https://www.yelp.com/biz/danna-rock-park-san-jose?adjust_creative=9FUT8HLBJS…
##  5 https://www.yelp.com/biz/lake-cunningham-park-san-jose?adjust_creative=9FUT8…
##  6 https://www.yelp.com/biz/nisich-park-san-jose-2?adjust_creative=9FUT8HLBJS3n…
##  7 https://www.yelp.com/biz/welch-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7…
##  8 https://www.yelp.com/biz/hillview-park-san-jose?adjust_creative=9FUT8HLBJS3n…
##  9 https://www.yelp.com/biz/vieira-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ…
## 10 https://www.yelp.com/biz/lincoln-glen-park-san-jose?adjust_creative=9FUT8HLB…
##    review_count categories   rating coordinates$latitude $longitude transactions
##           <int> <list>        <dbl>                <dbl>      <dbl> <list>      
##  1           39 <df [1 × 2]>    4                   37.3      -122. <list [0]>  
##  2            6 <df [1 × 2]>    3.5                 37.3      -122. <list [0]>  
##  3            4 <df [2 × 2]>    3                   37.3      -122. <list [0]>  
##  4            3 <df [1 × 2]>    2.5                 37.3      -122. <list [0]>  
##  5           95 <df [1 × 2]>    3.5                 37.3      -122. <list [0]>  
##  6            4 <df [1 × 2]>    4.5                 37.3      -122. <list [0]>  
##  7            7 <df [1 × 2]>    3                   37.3      -122. <list [0]>  
##  8            2 <df [2 × 2]>    4                   37.3      -122. <list [0]>  
##  9           22 <df [1 × 2]>    4                   37.3      -122. <list [0]>  
## 10           32 <df [1 × 2]>    4                   37.3      -122. <list [0]>  
##    location$address1            $address2 $address3 $city    $zip_code $country
##    <chr>                        <chr>     <chr>     <chr>    <chr>     <chr>   
##  1 200 Edenvale Ave             ""        ""        San Jose 95136     US      
##  2 5248 Snow Dr                 ""        ""        San Jose 95111     US      
##  3 Chynoweth Ave & Edenvale Ave ""        ""        San Jose 95136     US      
##  4 4524-4534 Houndshaven Way    ""        ""        San Jose 95111     US      
##  5 2305 S White Rd              ""        ""        San Jose 95101     US      
##  6 1401-1437 Suzay Ct           ""        ""        San Jose 95122     US      
##  7 Kenesta Wy                   ""        ""        San Jose 95122     US      
##  8 Adrian Way & Ocala Ave       ""        ""        San Jose 95122     US      
##  9 700 Adeline Ave              ""        ""        San Jose 95136     US      
## 10 Radio Ave & Curtner Ave      ""        ""        San Jose 95125     US      
##    $state $display_address phone          display_phone    distance price keys 
##    <chr>  <list>           <chr>          <chr>               <dbl> <chr> <chr>
##  1 CA     <chr [2]>        ""             ""                   526. <NA>  p    
##  2 CA     <chr [2]>        "+14087935510" "(408) 793-5510"     909. <NA>  p    
##  3 CA     <chr [2]>        "+14087935510" "(408) 793-5510"     683. <NA>  p    
##  4 CA     <chr [2]>        ""             ""                  1082. <NA>  p    
##  5 CA     <chr [2]>        "+14087935510" "(408) 793-5510"    1346. <NA>  p    
##  6 CA     <chr [2]>        ""             ""                  1804. <NA>  p    
##  7 CA     <chr [2]>        ""             ""                   689. <NA>  p    
##  8 CA     <chr [2]>        "+14087935510" "(408) 793-5510"     850. <NA>  p    
##  9 CA     <chr [2]>        ""             ""                  1063. <NA>  p    
## 10 CA     <chr [2]>        "+14085353570" "(408) 535-3570"    1553. <NA>  p    
## # … with 39,692 more rows
yelp_subset$coordinates %>% head()
##   latitude longitude
## 1 37.26251 -121.8203
## 2 37.26807 -121.8057
## 3 37.25949 -121.8188
## 4 37.27216 -121.8234
## 5 37.33745 -121.8089
## 6 37.31825 -121.8339
yelp_flat <- yelp_subset %>% 
  jsonlite::flatten() %>% 
  as_tibble() 

yelp_flat$coordinates %>% head()
## Warning: Unknown or uninitialised column: `coordinates`.
## NULL
# Concatenate what's inside the list
yelp_concat <- yelp_flat %>% 
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")))
# Custom function that takes the data frame in "categories" column in Yelp data
# and returns a character vector
concate_list <- function(x){
  # x is a data frame with columns "alias" and "title" from Yelp$categories
  # returns a character vector containing category concatenated titles 
  titles <- x[["title"]] %>% str_c(collapse = ", ")
  return(titles)
}

yelp_flat2 <- yelp_concat %>% 
  mutate(categories = categories %>% map_chr(concate_list))

yelp_flat2 %>% print(width = 1000)
## # A tibble: 39,702 × 25
##    id                     alias                         name                
##    <chr>                  <chr>                         <chr>               
##  1 PkOM7wJZzZ0DoxW84_uLDg edenvale-garden-park-san-jose Edenvale Garden Park
##  2 j-gBxd5Nkhr9iRHi_NJp7w great-oaks-park-san-jose      Great Oaks Park     
##  3 C41iNUrHTWYN9rUJSn_rJw chynoweth-park-san-jose       Chynoweth Park      
##  4 wvUCOyYuNcsc7g3OsMxWiA danna-rock-park-san-jose      Danna Rock Park     
##  5 sJ0RCNgqZ4nlLMl4LhKDmQ lake-cunningham-park-san-jose Lake Cunningham Park
##  6 R6Eb_p72vynnpq20aOLATg nisich-park-san-jose-2        Nisich Park         
##  7 jcxoXMWrqreHQT0P5h1s9g welch-park-san-jose           Welch Park          
##  8 Y-tuulUyvCfDzzH4yV-WgQ hillview-park-san-jose        Hillview Park       
##  9 BCJx7oQ8sN55mcqMyWFnmg vieira-park-san-jose          Vieira Park         
## 10 DGSPySxKJJJfO6stJEF7Uw lincoln-glen-park-san-jose    Lincoln Glen Park   
##    image_url                                                           
##    <chr>                                                               
##  1 https://s3-media4.fl.yelpcdn.com/bphoto/yFTWpEtAU5xib85UvVq1pQ/o.jpg
##  2 https://s3-media4.fl.yelpcdn.com/bphoto/rcIYKSNUQ84vatIfw6_h7A/o.jpg
##  3 https://s3-media1.fl.yelpcdn.com/bphoto/gx6Ei11wytx_V215HF1gzg/o.jpg
##  4 https://s3-media3.fl.yelpcdn.com/bphoto/F0AQ22xR2eMrDiImNn66fA/o.jpg
##  5 https://s3-media3.fl.yelpcdn.com/bphoto/ZzHo_dwM5ar6CKV6TsxshQ/o.jpg
##  6 https://s3-media3.fl.yelpcdn.com/bphoto/ECTqvmIlk5LyKxKM7A-Row/o.jpg
##  7 https://s3-media4.fl.yelpcdn.com/bphoto/g5mj_1kJMixlfUYA2AgfWg/o.jpg
##  8 https://s3-media1.fl.yelpcdn.com/bphoto/p7Gg879Fu8GzS66cPeu9ig/o.jpg
##  9 https://s3-media2.fl.yelpcdn.com/bphoto/6s2pA1-_oklqy66z0EaKdQ/o.jpg
## 10 https://s3-media4.fl.yelpcdn.com/bphoto/qfk9XMXuvS_0GOHknXZL6w/o.jpg
##    is_closed
##    <lgl>    
##  1 FALSE    
##  2 FALSE    
##  3 FALSE    
##  4 FALSE    
##  5 FALSE    
##  6 FALSE    
##  7 FALSE    
##  8 FALSE    
##  9 FALSE    
## 10 FALSE    
##    url                                                                          
##    <chr>                                                                        
##  1 https://www.yelp.com/biz/edenvale-garden-park-san-jose?adjust_creative=9FUT8…
##  2 https://www.yelp.com/biz/great-oaks-park-san-jose?adjust_creative=9FUT8HLBJS…
##  3 https://www.yelp.com/biz/chynoweth-park-san-jose?adjust_creative=9FUT8HLBJS3…
##  4 https://www.yelp.com/biz/danna-rock-park-san-jose?adjust_creative=9FUT8HLBJS…
##  5 https://www.yelp.com/biz/lake-cunningham-park-san-jose?adjust_creative=9FUT8…
##  6 https://www.yelp.com/biz/nisich-park-san-jose-2?adjust_creative=9FUT8HLBJS3n…
##  7 https://www.yelp.com/biz/welch-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7…
##  8 https://www.yelp.com/biz/hillview-park-san-jose?adjust_creative=9FUT8HLBJS3n…
##  9 https://www.yelp.com/biz/vieira-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ…
## 10 https://www.yelp.com/biz/lincoln-glen-park-san-jose?adjust_creative=9FUT8HLB…
##    review_count categories         rating transactions phone         
##           <int> <chr>               <dbl> <chr>        <chr>         
##  1           39 Parks                 4   ""           ""            
##  2            6 Parks                 3.5 ""           "+14087935510"
##  3            4 Parks, Playgrounds    3   ""           "+14087935510"
##  4            3 Parks                 2.5 ""           ""            
##  5           95 Parks                 3.5 ""           "+14087935510"
##  6            4 Parks                 4.5 ""           ""            
##  7            7 Parks                 3   ""           ""            
##  8            2 Parks, Playgrounds    4   ""           "+14087935510"
##  9           22 Parks                 4   ""           ""            
## 10           32 Parks                 4   ""           "+14085353570"
##    display_phone    distance price keys  coordinates.latitude
##    <chr>               <dbl> <chr> <chr>                <dbl>
##  1 ""                   526. <NA>  p                     37.3
##  2 "(408) 793-5510"     909. <NA>  p                     37.3
##  3 "(408) 793-5510"     683. <NA>  p                     37.3
##  4 ""                  1082. <NA>  p                     37.3
##  5 "(408) 793-5510"    1346. <NA>  p                     37.3
##  6 ""                  1804. <NA>  p                     37.3
##  7 ""                   689. <NA>  p                     37.3
##  8 "(408) 793-5510"     850. <NA>  p                     37.3
##  9 ""                  1063. <NA>  p                     37.3
## 10 "(408) 535-3570"    1553. <NA>  p                     37.3
##    coordinates.longitude location.address1            location.address2
##                    <dbl> <chr>                        <chr>            
##  1                 -122. 200 Edenvale Ave             ""               
##  2                 -122. 5248 Snow Dr                 ""               
##  3                 -122. Chynoweth Ave & Edenvale Ave ""               
##  4                 -122. 4524-4534 Houndshaven Way    ""               
##  5                 -122. 2305 S White Rd              ""               
##  6                 -122. 1401-1437 Suzay Ct           ""               
##  7                 -122. Kenesta Wy                   ""               
##  8                 -122. Adrian Way & Ocala Ave       ""               
##  9                 -122. 700 Adeline Ave              ""               
## 10                 -122. Radio Ave & Curtner Ave      ""               
##    location.address3 location.city location.zip_code location.country
##    <chr>             <chr>         <chr>             <chr>           
##  1 ""                San Jose      95136             US              
##  2 ""                San Jose      95111             US              
##  3 ""                San Jose      95136             US              
##  4 ""                San Jose      95111             US              
##  5 ""                San Jose      95101             US              
##  6 ""                San Jose      95122             US              
##  7 ""                San Jose      95122             US              
##  8 ""                San Jose      95122             US              
##  9 ""                San Jose      95136             US              
## 10 ""                San Jose      95125             US              
##    location.state location.display_address                        
##    <chr>          <chr>                                           
##  1 CA             200 Edenvale Ave, San Jose, CA 95136            
##  2 CA             5248 Snow Dr, San Jose, CA 95111                
##  3 CA             Chynoweth Ave & Edenvale Ave, San Jose, CA 95136
##  4 CA             4524-4534 Houndshaven Way, San Jose, CA 95111   
##  5 CA             2305 S White Rd, San Jose, CA 95101             
##  6 CA             1401-1437 Suzay Ct, San Jose, CA 95122          
##  7 CA             Kenesta Wy, San Jose, CA 95122                  
##  8 CA             Adrian Way & Ocala Ave, San Jose, CA 95122      
##  9 CA             700 Adeline Ave, San Jose, CA 95136             
## 10 CA             Radio Ave & Curtner Ave, San Jose, CA 95125     
## # … with 39,692 more rows
# Read the full data
my_yelp <- read_rds(here("D:/Georgia Tech/Spec topic_/yelp_all_4.rds"))

# Issue 2 ------------------------------
yelp_unique <- my_yelp %>% 
  distinct(id, .keep_all=T)

glue::glue("Before dropping NA, there were {nrow(my_yelp)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>% 
  print()
## Before dropping NA, there were 39702 rows. After dropping them, there are 5551 rows
# Issue 3 ------------------------------
yelp_flat <- yelp_unique %>% 
  # 1. Flattening columns with data frame
  jsonlite::flatten() %>% 
  # 2. Handling list-columns
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         categories = categories %>% map_chr(concate_list)) # concate_list is the custom function
# Issue 4 ------------------------------
yelp_flat %>% 
  map_dbl(., function(x) sum(is.na(x))) 
##                       id                    alias                     name 
##                        0                        0                        0 
##                image_url                is_closed                      url 
##                        0                        0                        0 
##             review_count               categories                   rating 
##                        0                        0                        0 
##             transactions                    phone            display_phone 
##                        0                        0                        0 
##                 distance                    price                     keys 
##                        0                     1650                        0 
##     coordinates.latitude    coordinates.longitude        location.address1 
##                        2                        2                       73 
##        location.address2        location.address3            location.city 
##                     1146                     1327                        0 
##        location.zip_code         location.country           location.state 
##                        0                        0                        0 
## location.display_address 
##                        0
# Fist, let's verify that the 4 missing values in lat/long columns are in the same rows.
identical(is.na(yelp_flat$coordinates.latitude),
          is.na(yelp_flat$coordinates.longitude)) # Yes, they are in the same 4 rows.
## [1] TRUE
# Drop them.
yelp_dropna1 <- yelp_flat %>% 
  drop_na(coordinates.longitude)

# Dropping NAs in price
yelp_dropna2 <- yelp_dropna1 %>% 
  drop_na(price)
head(yelp_dropna2)
##                       id                                alias
## 1 cToVW-dCXcxhqFQX_xSc-w      orchard-heritage-park-sunnyvale
## 2 Qbuy0qvdlXaXKDOpaDnCBg                  pho-bowl-san-jose-4
## 3 pyCLzdHoqs3ndmGV1vPamA             mingles-mango-san-jose-2
## 4 nm7YhFGOMAw1ojcX32RmpQ tacos-el-pollo-y-el-pollito-san-jose
## 5 Iw0u4il3SpX7rtY0ookQdA puro-michoacan-restaurant-san-jose-2
## 6 WE1O8_MfVq4kM4G1OVAP7g               trines-cafe-6-san-jose
##                          name
## 1       Orchard Heritage Park
## 2                    Pho Bowl
## 3              Mingle's Mango
## 4 Tacos El Pollo Y El Pollito
## 5   Puro Michoacan Restaurant
## 6               Trines Cafe 6
##                                                              image_url
## 1 https://s3-media2.fl.yelpcdn.com/bphoto/HSOJ1H2XNrLBhlATnPHoKg/o.jpg
## 2 https://s3-media2.fl.yelpcdn.com/bphoto/OwRnJ_0kxA_glrocLu1spg/o.jpg
## 3 https://s3-media3.fl.yelpcdn.com/bphoto/QZSPNHxnVyu9KBwDEqbkbw/o.jpg
## 4 https://s3-media2.fl.yelpcdn.com/bphoto/2OcE1RSGx8MEFtfj566Hew/o.jpg
## 5 https://s3-media2.fl.yelpcdn.com/bphoto/tPx6AVzj0Xh7SnKt1jalEQ/o.jpg
## 6 https://s3-media2.fl.yelpcdn.com/bphoto/KFiqnd1ETApqoPgSq_0Ebg/o.jpg
##   is_closed
## 1     FALSE
## 2     FALSE
## 3     FALSE
## 4     FALSE
## 5     FALSE
## 6     FALSE
##                                                                                                                                                                                                 url
## 1      https://www.yelp.com/biz/orchard-heritage-park-sunnyvale?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2                  https://www.yelp.com/biz/pho-bowl-san-jose-4?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 3             https://www.yelp.com/biz/mingles-mango-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 4 https://www.yelp.com/biz/tacos-el-pollo-y-el-pollito-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 5 https://www.yelp.com/biz/puro-michoacan-restaurant-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 6               https://www.yelp.com/biz/trines-cafe-6-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
##   review_count                     categories rating     transactions
## 1            8     Museums, Parks, Gift Shops      5                 
## 2          382      Vietnamese, Soup, Noodles      4         delivery
## 3          362 Asian Fusion, Dim Sum, Noodles      4 pickup, delivery
## 4           65           Mexican, Food Trucks      4         delivery
## 5           38                        Mexican      4                 
## 6           68                        Mexican      4         delivery
##          phone  display_phone  distance price keys coordinates.latitude
## 1 +14087490220 (408) 749-0220 1531.2446    $$    p             37.35776
## 2 +14082818288 (408) 281-8288  881.1513     $    a             37.25979
## 3 +14083001425 (408) 300-1425  778.2162    $$    a             37.26058
## 4 +14085092940 (408) 509-2940  702.4299     $    a             37.26053
## 5 +16692347944 (669) 234-7944  262.5733    $$    a             37.26297
## 6 +14082243243 (408) 224-3243  889.1281    $$    a             37.25960
##   coordinates.longitude  location.address1 location.address2 location.address3
## 1             -122.0268 560 E Remington Dr                                <NA>
## 2             -121.8077  5316 Monterey Hwy                                    
## 3             -121.8083      Monterey Rd D             Ste D                  
## 4             -121.8096  5270 Monterey Hwy                                    
## 5             -121.8136  5138 Monterey Hwy             Ste A              <NA>
## 6             -121.8078  5304 Monterey Hwy                                    
##   location.city location.zip_code location.country location.state
## 1     Sunnyvale             94087               US             CA
## 2      San Jose             95111               US             CA
## 3      San Jose             95111               US             CA
## 4      San Jose             95111               US             CA
## 5      San Jose             95111               US             CA
## 6      San Jose             95111               US             CA
##                       location.display_address
## 1      560 E Remington Dr, Sunnyvale, CA 94087
## 2        5316 Monterey Hwy, San Jose, CA 95111
## 3     Monterey Rd D, Ste D, San Jose, CA 95111
## 4        5270 Monterey Hwy, San Jose, CA 95111
## 5 5138 Monterey Hwy, Ste A, San Jose, CA 95111
## 6        5304 Monterey Hwy, San Jose, CA 95111
summary(yelp_dropna1$price)
##    Length     Class      Mode 
##      5549 character character
# census boundary
#census_a <- st_read("https://raw.githubusercontent.com/BonwooKoo/UrbanAnalytics2022/main/Lab/module_0/testdata.geojson") 
#getting the tract data 
census_1 <- suppressMessages(
  get_acs(geography = "tract", # or "block group", "county", "state" etc. 
          state = "CA",
          county = c("Santa Clara"), 
          variables = c(hhincome = 'B19019_001',
                        race.tot = "B02001_001", 
                        race.white = "B02001_002", 
                        race.black = 'B02001_003'
          ),
          year = 2019,
          survey = "acs5", # American Community Survey 5-year estimate
          geometry = TRUE, # returns sf objects
          output = "wide") # wide vs. long
)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |===============================================================       |  91%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
census_1_s <- separate(data= census_1, col=NAME, into= c('tract', 'county', 'state'), sep=",")

head(census_1_s)
## Simple feature collection with 6 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -121.8832 ymin: 37.2602 xmax: -121.7828 ymax: 37.34074
## Geodetic CRS:  NAD83
##         GEOID                tract              county       state hhincomeE
## 1 06085512042 Census Tract 5120.42  Santa Clara County  California     78382
## 2 06085503306 Census Tract 5033.06  Santa Clara County  California     87361
## 3 06085503108 Census Tract 5031.08  Santa Clara County  California    111618
## 4 06085503323 Census Tract 5033.23  Santa Clara County  California    135877
## 5 06085503401 Census Tract 5034.01  Santa Clara County  California     79318
## 6 06085512020 Census Tract 5120.20  Santa Clara County  California    102401
##   hhincomeM race.totE race.totM race.whiteE race.whiteM race.blackE race.blackM
## 1      3798      3020       292         878         173         151          79
## 2     14909      4373       379        1349         443          86          71
## 3     23929      8222       627        3301         619         637         211
## 4     14938      4763       402        1308         458         251         155
## 5     17404      4468       431        1202         366          75          65
## 6     10813      7825       879        4039         902         167         106
##                         geometry
## 1 MULTIPOLYGON (((-121.82 37....
## 2 MULTIPOLYGON (((-121.8318 3...
## 3 MULTIPOLYGON (((-121.8832 3...
## 4 MULTIPOLYGON (((-121.8006 3...
## 5 MULTIPOLYGON (((-121.8455 3...
## 6 MULTIPOLYGON (((-121.8419 3...
yelp_sf <- yelp_dropna1 %>% st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)

# sf subsets
yelp_in <- yelp_sf[census_1_s %>% st_transform(4326) %>%
                     filter(county %in% c(" Santa Clara County")) %>% 
                     st_union(), ,op = st_intersects]
nrow(yelp_in)
## [1] 5291
glue::glue("nrow before: {nrow(my_yelp)} -> nrow after: {nrow(yelp_in)} \n
            ncol before: {ncol(my_yelp)} -> ncol after: {ncol(yelp_in)} \n") %>% 
  print()
## nrow before: 39702 -> nrow after: 5291 
## 
## ncol before: 17 -> ncol after: 24
# Visualize
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(yelp_in) + tm_dots(col = "price")
# census is currently sfc. Convert it to sf.
census_sf <- census_1 %>% st_sf()
st_crs(census_sf) <- 4326
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
# Spatial join
census_yelp <- st_join(census_sf, yelp_in, join = st_intersects) #%>% st_transform(4326)



yelp_census <- st_join(yelp_in, census_sf, join = st_intersects)
# View
census_yelp %>% head()
## Simple feature collection with 6 features and 33 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -121.8318 ymin: 37.2602 xmax: -121.8083 ymax: 37.34074
## Geodetic CRS:  WGS 84
##           GEOID                                                 NAME hhincomeE
## 1   06085512042 Census Tract 5120.42, Santa Clara County, California     78382
## 2   06085503306 Census Tract 5033.06, Santa Clara County, California     87361
## 2.1 06085503306 Census Tract 5033.06, Santa Clara County, California     87361
## 2.2 06085503306 Census Tract 5033.06, Santa Clara County, California     87361
## 2.3 06085503306 Census Tract 5033.06, Santa Clara County, California     87361
## 2.4 06085503306 Census Tract 5033.06, Santa Clara County, California     87361
##     hhincomeM race.totE race.totM race.whiteE race.whiteM race.blackE
## 1        3798      3020       292         878         173         151
## 2       14909      4373       379        1349         443          86
## 2.1     14909      4373       379        1349         443          86
## 2.2     14909      4373       379        1349         443          86
## 2.3     14909      4373       379        1349         443          86
## 2.4     14909      4373       379        1349         443          86
##     race.blackM                     id
## 1            79 Iw0u4il3SpX7rtY0ookQdA
## 2            71 jcxoXMWrqreHQT0P5h1s9g
## 2.1          71 lZUd_rDdO5FyZQZfKjGqZw
## 2.2          71 4XaESbCqIsmETYxpPiv9Lg
## 2.3          71 NlYprq0SB1tNdgWv1GpXdA
## 2.4          71 smFno58h21Rd4RihK87xvw
##                                                     alias
## 1                    puro-michoacan-restaurant-san-jose-2
## 2                                     welch-park-san-jose
## 2.1                          cha-ca-long-phung-san-jose-2
## 2.2                      mexican-style-churros-san-jose-2
## 2.3 que-ta-banh-canh-trang-bang-udon-noodle-soup-san-jose
## 2.4                              taste-of-persia-san-jose
##                                               name
## 1                        Puro Michoacan Restaurant
## 2                                       Welch Park
## 2.1                              Cha Ca Long Phung
## 2.2                          Mexican Style Churros
## 2.3 Que Ta Banh Canh Trang Bang - Udon Noodle Soup
## 2.4                                Taste of Persia
##                                                                image_url
## 1   https://s3-media2.fl.yelpcdn.com/bphoto/tPx6AVzj0Xh7SnKt1jalEQ/o.jpg
## 2   https://s3-media4.fl.yelpcdn.com/bphoto/g5mj_1kJMixlfUYA2AgfWg/o.jpg
## 2.1 https://s3-media1.fl.yelpcdn.com/bphoto/HF_-0eyrZQ9_-lKVjNpUyg/o.jpg
## 2.2 https://s3-media3.fl.yelpcdn.com/bphoto/L4IDuy_D2wpBwiSC0P6qzw/o.jpg
## 2.3 https://s3-media1.fl.yelpcdn.com/bphoto/ykTpTBey8xmauZpnv3s22A/o.jpg
## 2.4 https://s3-media2.fl.yelpcdn.com/bphoto/6g5XM0Pgi3qIYv9PnkAESw/o.jpg
##     is_closed
## 1       FALSE
## 2       FALSE
## 2.1     FALSE
## 2.2     FALSE
## 2.3     FALSE
## 2.4     FALSE
##                                                                                                                                                                                                                    url
## 1                    https://www.yelp.com/biz/puro-michoacan-restaurant-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2                                     https://www.yelp.com/biz/welch-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.1                          https://www.yelp.com/biz/cha-ca-long-phung-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.2                      https://www.yelp.com/biz/mexican-style-churros-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.3 https://www.yelp.com/biz/que-ta-banh-canh-trang-bang-udon-noodle-soup-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.4                              https://www.yelp.com/biz/taste-of-persia-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
##     review_count                     categories rating     transactions
## 1             38                        Mexican    4.0                 
## 2              7                          Parks    3.0                 
## 2.1          200                     Vietnamese    4.0                 
## 2.2           52              Desserts, Mexican    5.0                 
## 2.3          192   Vietnamese, Seafood, Noodles    3.5 pickup, delivery
## 2.4           82 Mediterranean, Persian/Iranian    4.0 pickup, delivery
##            phone  display_phone distance price keys   location.address1
## 1   +16692347944 (669) 234-7944 262.5733    $$    a   5138 Monterey Hwy
## 2                               689.0162  <NA>    p          Kenesta Wy
## 2.1 +14082549941 (408) 254-9941 533.5555     $    a       2145 Tully Rd
## 2.2 +14085129594 (408) 512-9594 757.5121     $    a 1812 Cunningham Ave
## 2.3 +14082591445 (408) 259-1445 603.5719    $$    a       2005 Tully Rd
## 2.4 +14084935978 (408) 493-5978 597.6317    $$    a       2011 Tully Rd
##     location.address2 location.address3 location.city location.zip_code
## 1               Ste A              <NA>      San Jose             95111
## 2                                            San Jose             95122
## 2.1                                          San Jose             95122
## 2.2                                <NA>      San Jose             95122
## 2.3              <NA>                        San Jose             95122
## 2.4                                <NA>      San Jose             95122
##     location.country location.state
## 1                 US             CA
## 2                 US             CA
## 2.1               US             CA
## 2.2               US             CA
## 2.3               US             CA
## 2.4               US             CA
##                         location.display_address                       geometry
## 1   5138 Monterey Hwy, Ste A, San Jose, CA 95111 MULTIPOLYGON (((-121.82 37....
## 2                 Kenesta Wy, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.1            2145 Tully Rd, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.2      1812 Cunningham Ave, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.3            2005 Tully Rd, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.4            2011 Tully Rd, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
yelp_census %>% head()
## Simple feature collection with 6 features and 33 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -121.8339 ymin: 37.25949 xmax: -121.8057 ymax: 37.33745
## Geodetic CRS:  WGS 84
##                       id                         alias                 name
## 1 PkOM7wJZzZ0DoxW84_uLDg edenvale-garden-park-san-jose Edenvale Garden Park
## 2 j-gBxd5Nkhr9iRHi_NJp7w      great-oaks-park-san-jose      Great Oaks Park
## 3 C41iNUrHTWYN9rUJSn_rJw       chynoweth-park-san-jose       Chynoweth Park
## 4 wvUCOyYuNcsc7g3OsMxWiA      danna-rock-park-san-jose      Danna Rock Park
## 5 sJ0RCNgqZ4nlLMl4LhKDmQ lake-cunningham-park-san-jose Lake Cunningham Park
## 6 R6Eb_p72vynnpq20aOLATg        nisich-park-san-jose-2          Nisich Park
##                                                              image_url
## 1 https://s3-media4.fl.yelpcdn.com/bphoto/yFTWpEtAU5xib85UvVq1pQ/o.jpg
## 2 https://s3-media4.fl.yelpcdn.com/bphoto/rcIYKSNUQ84vatIfw6_h7A/o.jpg
## 3 https://s3-media1.fl.yelpcdn.com/bphoto/gx6Ei11wytx_V215HF1gzg/o.jpg
## 4 https://s3-media3.fl.yelpcdn.com/bphoto/F0AQ22xR2eMrDiImNn66fA/o.jpg
## 5 https://s3-media3.fl.yelpcdn.com/bphoto/ZzHo_dwM5ar6CKV6TsxshQ/o.jpg
## 6 https://s3-media3.fl.yelpcdn.com/bphoto/ECTqvmIlk5LyKxKM7A-Row/o.jpg
##   is_closed
## 1     FALSE
## 2     FALSE
## 3     FALSE
## 4     FALSE
## 5     FALSE
## 6     FALSE
##                                                                                                                                                                                          url
## 1 https://www.yelp.com/biz/edenvale-garden-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2      https://www.yelp.com/biz/great-oaks-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 3       https://www.yelp.com/biz/chynoweth-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 4      https://www.yelp.com/biz/danna-rock-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 5 https://www.yelp.com/biz/lake-cunningham-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 6        https://www.yelp.com/biz/nisich-park-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
##   review_count         categories rating transactions        phone
## 1           39              Parks    4.0                          
## 2            6              Parks    3.5              +14087935510
## 3            4 Parks, Playgrounds    3.0              +14087935510
## 4            3              Parks    2.5                          
## 5           95              Parks    3.5              +14087935510
## 6            4              Parks    4.5                          
##    display_phone  distance price keys            location.address1
## 1                 526.0228  <NA>    p             200 Edenvale Ave
## 2 (408) 793-5510  909.4110  <NA>    p                 5248 Snow Dr
## 3 (408) 793-5510  682.7830  <NA>    p Chynoweth Ave & Edenvale Ave
## 4                1082.4227  <NA>    p    4524-4534 Houndshaven Way
## 5 (408) 793-5510 1346.1879  <NA>    p              2305 S White Rd
## 6                1803.6135  <NA>    p           1401-1437 Suzay Ct
##   location.address2 location.address3 location.city location.zip_code
## 1                                          San Jose             95136
## 2                                          San Jose             95111
## 3                                          San Jose             95136
## 4                                          San Jose             95111
## 5                                          San Jose             95101
## 6                                          San Jose             95122
##   location.country location.state
## 1               US             CA
## 2               US             CA
## 3               US             CA
## 4               US             CA
## 5               US             CA
## 6               US             CA
##                           location.display_address       GEOID
## 1             200 Edenvale Ave, San Jose, CA 95136 06085512021
## 2                 5248 Snow Dr, San Jose, CA 95111 06085512017
## 3 Chynoweth Ave & Edenvale Ave, San Jose, CA 95136 06085512021
## 4    4524-4534 Houndshaven Way, San Jose, CA 95111 06085512017
## 5              2305 S White Rd, San Jose, CA 95101 06085503321
## 6           1401-1437 Suzay Ct, San Jose, CA 95122 06085503111
##                                                   NAME hhincomeE hhincomeM
## 1 Census Tract 5120.21, Santa Clara County, California    130460      7700
## 2 Census Tract 5120.17, Santa Clara County, California     92855     17920
## 3 Census Tract 5120.21, Santa Clara County, California    130460      7700
## 4 Census Tract 5120.17, Santa Clara County, California     92855     17920
## 5 Census Tract 5033.21, Santa Clara County, California    156667     28278
## 6 Census Tract 5031.11, Santa Clara County, California     99423     18943
##   race.totE race.totM race.whiteE race.whiteM race.blackE race.blackM
## 1      6491       502        3331         469         275         225
## 2      7565       781        3316         634         196         175
## 3      6491       502        3331         469         275         225
## 4      7565       781        3316         634         196         175
## 5      4690       355         419         182          44          49
## 6      5132       483        1167         444          17          27
##                     geometry
## 1 POINT (-121.8203 37.26251)
## 2 POINT (-121.8057 37.26807)
## 3 POINT (-121.8188 37.25949)
## 4 POINT (-121.8234 37.27216)
## 5 POINT (-121.8089 37.33745)
## 6 POINT (-121.8339 37.31825)
tm_shape(census_yelp %>% group_by(GEOID) %>% summarise(rating=mean(rating))) + 
  tm_polygons(col = "rating", style = "quantile")
tm_shape(yelp_census) + tm_dots(col="hhincomeE")
yelp_in %>% 
  # Use mutate bc the re-coded variable is a new variable
  mutate(review_count_binary = case_when(review_count > 1000 ~ "many",
                                         review_count <= 1000 ~ "few")) %>% 
  # Select these two columns to simplify the print out
  select(review_count, review_count_binary) %>% 
  head()
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -121.8339 ymin: 37.25949 xmax: -121.8057 ymax: 37.33745
## Geodetic CRS:  WGS 84
##   review_count review_count_binary                   geometry
## 1           39                 few POINT (-121.8203 37.26251)
## 2            6                 few POINT (-121.8057 37.26807)
## 3            4                 few POINT (-121.8188 37.25949)
## 4            3                 few POINT (-121.8234 37.27216)
## 5           95                 few POINT (-121.8089 37.33745)
## 6            4                 few POINT (-121.8339 37.31825)
yelp_in %>% 
  mutate(across(is.numeric, scale)) %>% 
  select(is.numeric)
## Warning: Predicate functions must be wrapped in `where()`.
## 
##   # Bad
##   data %>% select(is.numeric)
## 
##   # Good
##   data %>% select(where(is.numeric))
## 
## ℹ Please update your code.
## This message is displayed once per session.
## Simple feature collection with 5291 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -122.1847 ymin: 36.95046 xmax: -121.52 ymax: 37.45962
## Geodetic CRS:  WGS 84
## First 10 features:
##    review_count     rating    distance                   geometry
## 1    -0.5120726  0.4815183 -0.50796146 POINT (-121.8203 37.26251)
## 2    -0.5844259 -0.1124927 -0.32989489 POINT (-121.8057 37.26807)
## 3    -0.5888110 -0.7065038 -0.43515339 POINT (-121.8188 37.25949)
## 4    -0.5910035 -1.3005149 -0.24953872 POINT (-121.8234 37.27216)
## 5    -0.3892912 -0.1124927 -0.12703162 POINT (-121.8089 37.33745)
## 6    -0.5888110  1.0755294  0.08542202 POINT (-121.8339 37.31825)
## 7    -0.5822334 -0.7065038 -0.43225838  POINT (-121.8239 37.3255)
## 8    -0.5931960  0.4815183 -0.35756898 POINT (-121.8254 37.33876)
## 9    -0.5493455  0.4815183 -0.25870205 POINT (-121.8614 37.28697)
## 10   -0.5274203  0.4815183 -0.03095143 POINT (-121.8896 37.28915)

The data has changed a lot in terms of patterns formed after cleaning the data. We can clearly see the difference in the second map where most of the poi s are along the transportation network. The clusters are denser in the center of the city and fades away as it goes away in the outer peripheral areas. The review count is higher for 4.0 rating and second highest 3.5. Spatially the price and rating have some correlations, the higher price restaurants are with good rating areas and vice versa.