library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.1
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## Warning: package 'ggplot2' was built under R version 4.2.1
## Warning: package 'tibble' was built under R version 4.2.1
## Warning: package 'readr' was built under R version 4.2.1
## Warning: package 'dplyr' was built under R version 4.2.1
## Warning: package 'forcats' was built under R version 4.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidyr)
library(here)
## Warning: package 'here' was built under R version 4.2.1
## here() starts at D:/Georgia Tech/Spec topic_
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 4.2.1
library(sf)
## Warning: package 'sf' was built under R version 4.2.1
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
## Warning: package 'tmap' was built under R version 4.2.1
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 4.2.1
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
library(tidyverse)
library(httr)
## Warning: package 'httr' was built under R version 4.2.1
library(jsonlite)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.2.1
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(here)
library(yelpr)
library(knitr)
## Warning: package 'knitr' was built under R version 4.2.1
tidycensus::census_api_key(Sys.getenv("google_api"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
install = TRUE
# Read a subset of Yelp data we downloaded last week
yelp_subset <- read_rds(here("D:/Georgia Tech/Spec topic_/yelp_all_4.rds"))
# Print to see what's inside
yelp_subset %>% 
  tibble() %>% 
  print(width = 1000)
## # A tibble: 39,702 × 17
##    id                     alias                         name                
##    <chr>                  <chr>                         <chr>               
##  1 PkOM7wJZzZ0DoxW84_uLDg edenvale-garden-park-san-jose Edenvale Garden Park
##  2 j-gBxd5Nkhr9iRHi_NJp7w great-oaks-park-san-jose      Great Oaks Park     
##  3 C41iNUrHTWYN9rUJSn_rJw chynoweth-park-san-jose       Chynoweth Park      
##  4 wvUCOyYuNcsc7g3OsMxWiA danna-rock-park-san-jose      Danna Rock Park     
##  5 sJ0RCNgqZ4nlLMl4LhKDmQ lake-cunningham-park-san-jose Lake Cunningham Park
##  6 R6Eb_p72vynnpq20aOLATg nisich-park-san-jose-2        Nisich Park         
##  7 jcxoXMWrqreHQT0P5h1s9g welch-park-san-jose           Welch Park          
##  8 Y-tuulUyvCfDzzH4yV-WgQ hillview-park-san-jose        Hillview Park       
##  9 BCJx7oQ8sN55mcqMyWFnmg vieira-park-san-jose          Vieira Park         
## 10 DGSPySxKJJJfO6stJEF7Uw lincoln-glen-park-san-jose    Lincoln Glen Park   
##    image_url                                                           
##    <chr>                                                               
##  1 https://s3-media4.fl.yelpcdn.com/bphoto/yFTWpEtAU5xib85UvVq1pQ/o.jpg
##  2 https://s3-media4.fl.yelpcdn.com/bphoto/rcIYKSNUQ84vatIfw6_h7A/o.jpg
##  3 https://s3-media1.fl.yelpcdn.com/bphoto/gx6Ei11wytx_V215HF1gzg/o.jpg
##  4 https://s3-media3.fl.yelpcdn.com/bphoto/F0AQ22xR2eMrDiImNn66fA/o.jpg
##  5 https://s3-media3.fl.yelpcdn.com/bphoto/ZzHo_dwM5ar6CKV6TsxshQ/o.jpg
##  6 https://s3-media3.fl.yelpcdn.com/bphoto/ECTqvmIlk5LyKxKM7A-Row/o.jpg
##  7 https://s3-media4.fl.yelpcdn.com/bphoto/g5mj_1kJMixlfUYA2AgfWg/o.jpg
##  8 https://s3-media1.fl.yelpcdn.com/bphoto/p7Gg879Fu8GzS66cPeu9ig/o.jpg
##  9 https://s3-media2.fl.yelpcdn.com/bphoto/6s2pA1-_oklqy66z0EaKdQ/o.jpg
## 10 https://s3-media4.fl.yelpcdn.com/bphoto/qfk9XMXuvS_0GOHknXZL6w/o.jpg
##    is_closed
##    <lgl>    
##  1 FALSE    
##  2 FALSE    
##  3 FALSE    
##  4 FALSE    
##  5 FALSE    
##  6 FALSE    
##  7 FALSE    
##  8 FALSE    
##  9 FALSE    
## 10 FALSE    
##    url                                                                          
##    <chr>                                                                        
##  1 https://www.yelp.com/biz/edenvale-garden-park-san-jose?adjust_creative=9FUT8…
##  2 https://www.yelp.com/biz/great-oaks-park-san-jose?adjust_creative=9FUT8HLBJS…
##  3 https://www.yelp.com/biz/chynoweth-park-san-jose?adjust_creative=9FUT8HLBJS3…
##  4 https://www.yelp.com/biz/danna-rock-park-san-jose?adjust_creative=9FUT8HLBJS…
##  5 https://www.yelp.com/biz/lake-cunningham-park-san-jose?adjust_creative=9FUT8…
##  6 https://www.yelp.com/biz/nisich-park-san-jose-2?adjust_creative=9FUT8HLBJS3n…
##  7 https://www.yelp.com/biz/welch-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7…
##  8 https://www.yelp.com/biz/hillview-park-san-jose?adjust_creative=9FUT8HLBJS3n…
##  9 https://www.yelp.com/biz/vieira-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ…
## 10 https://www.yelp.com/biz/lincoln-glen-park-san-jose?adjust_creative=9FUT8HLB…
##    review_count categories   rating coordinates$latitude $longitude transactions
##           <int> <list>        <dbl>                <dbl>      <dbl> <list>      
##  1           39 <df [1 × 2]>    4                   37.3      -122. <list [0]>  
##  2            6 <df [1 × 2]>    3.5                 37.3      -122. <list [0]>  
##  3            4 <df [2 × 2]>    3                   37.3      -122. <list [0]>  
##  4            3 <df [1 × 2]>    2.5                 37.3      -122. <list [0]>  
##  5           95 <df [1 × 2]>    3.5                 37.3      -122. <list [0]>  
##  6            4 <df [1 × 2]>    4.5                 37.3      -122. <list [0]>  
##  7            7 <df [1 × 2]>    3                   37.3      -122. <list [0]>  
##  8            2 <df [2 × 2]>    4                   37.3      -122. <list [0]>  
##  9           22 <df [1 × 2]>    4                   37.3      -122. <list [0]>  
## 10           32 <df [1 × 2]>    4                   37.3      -122. <list [0]>  
##    location$address1            $address2 $address3 $city    $zip_code $country
##    <chr>                        <chr>     <chr>     <chr>    <chr>     <chr>   
##  1 200 Edenvale Ave             ""        ""        San Jose 95136     US      
##  2 5248 Snow Dr                 ""        ""        San Jose 95111     US      
##  3 Chynoweth Ave & Edenvale Ave ""        ""        San Jose 95136     US      
##  4 4524-4534 Houndshaven Way    ""        ""        San Jose 95111     US      
##  5 2305 S White Rd              ""        ""        San Jose 95101     US      
##  6 1401-1437 Suzay Ct           ""        ""        San Jose 95122     US      
##  7 Kenesta Wy                   ""        ""        San Jose 95122     US      
##  8 Adrian Way & Ocala Ave       ""        ""        San Jose 95122     US      
##  9 700 Adeline Ave              ""        ""        San Jose 95136     US      
## 10 Radio Ave & Curtner Ave      ""        ""        San Jose 95125     US      
##    $state $display_address phone          display_phone    distance price keys 
##    <chr>  <list>           <chr>          <chr>               <dbl> <chr> <chr>
##  1 CA     <chr [2]>        ""             ""                   526. <NA>  p    
##  2 CA     <chr [2]>        "+14087935510" "(408) 793-5510"     909. <NA>  p    
##  3 CA     <chr [2]>        "+14087935510" "(408) 793-5510"     683. <NA>  p    
##  4 CA     <chr [2]>        ""             ""                  1082. <NA>  p    
##  5 CA     <chr [2]>        "+14087935510" "(408) 793-5510"    1346. <NA>  p    
##  6 CA     <chr [2]>        ""             ""                  1804. <NA>  p    
##  7 CA     <chr [2]>        ""             ""                   689. <NA>  p    
##  8 CA     <chr [2]>        "+14087935510" "(408) 793-5510"     850. <NA>  p    
##  9 CA     <chr [2]>        ""             ""                  1063. <NA>  p    
## 10 CA     <chr [2]>        "+14085353570" "(408) 535-3570"    1553. <NA>  p    
## # … with 39,692 more rows
yelp_subset$coordinates %>% head()
##   latitude longitude
## 1 37.26251 -121.8203
## 2 37.26807 -121.8057
## 3 37.25949 -121.8188
## 4 37.27216 -121.8234
## 5 37.33745 -121.8089
## 6 37.31825 -121.8339
yelp_flat <- yelp_subset %>% 
  jsonlite::flatten() %>% 
  as_tibble() 

yelp_flat$coordinates %>% head()
## Warning: Unknown or uninitialised column: `coordinates`.
## NULL
# Concatenate what's inside the list
yelp_concat <- yelp_flat %>% 
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")))
# Custom function that takes the data frame in "categories" column in Yelp data
# and returns a character vector
concate_list <- function(x){
  # x is a data frame with columns "alias" and "title" from Yelp$categories
  # returns a character vector containing category concatenated titles 
  titles <- x[["title"]] %>% str_c(collapse = ", ")
  return(titles)
}

yelp_flat2 <- yelp_concat %>% 
  mutate(categories = categories %>% map_chr(concate_list))

yelp_flat2 %>% print(width = 1000)
## # A tibble: 39,702 × 25
##    id                     alias                         name                
##    <chr>                  <chr>                         <chr>               
##  1 PkOM7wJZzZ0DoxW84_uLDg edenvale-garden-park-san-jose Edenvale Garden Park
##  2 j-gBxd5Nkhr9iRHi_NJp7w great-oaks-park-san-jose      Great Oaks Park     
##  3 C41iNUrHTWYN9rUJSn_rJw chynoweth-park-san-jose       Chynoweth Park      
##  4 wvUCOyYuNcsc7g3OsMxWiA danna-rock-park-san-jose      Danna Rock Park     
##  5 sJ0RCNgqZ4nlLMl4LhKDmQ lake-cunningham-park-san-jose Lake Cunningham Park
##  6 R6Eb_p72vynnpq20aOLATg nisich-park-san-jose-2        Nisich Park         
##  7 jcxoXMWrqreHQT0P5h1s9g welch-park-san-jose           Welch Park          
##  8 Y-tuulUyvCfDzzH4yV-WgQ hillview-park-san-jose        Hillview Park       
##  9 BCJx7oQ8sN55mcqMyWFnmg vieira-park-san-jose          Vieira Park         
## 10 DGSPySxKJJJfO6stJEF7Uw lincoln-glen-park-san-jose    Lincoln Glen Park   
##    image_url                                                           
##    <chr>                                                               
##  1 https://s3-media4.fl.yelpcdn.com/bphoto/yFTWpEtAU5xib85UvVq1pQ/o.jpg
##  2 https://s3-media4.fl.yelpcdn.com/bphoto/rcIYKSNUQ84vatIfw6_h7A/o.jpg
##  3 https://s3-media1.fl.yelpcdn.com/bphoto/gx6Ei11wytx_V215HF1gzg/o.jpg
##  4 https://s3-media3.fl.yelpcdn.com/bphoto/F0AQ22xR2eMrDiImNn66fA/o.jpg
##  5 https://s3-media3.fl.yelpcdn.com/bphoto/ZzHo_dwM5ar6CKV6TsxshQ/o.jpg
##  6 https://s3-media3.fl.yelpcdn.com/bphoto/ECTqvmIlk5LyKxKM7A-Row/o.jpg
##  7 https://s3-media4.fl.yelpcdn.com/bphoto/g5mj_1kJMixlfUYA2AgfWg/o.jpg
##  8 https://s3-media1.fl.yelpcdn.com/bphoto/p7Gg879Fu8GzS66cPeu9ig/o.jpg
##  9 https://s3-media2.fl.yelpcdn.com/bphoto/6s2pA1-_oklqy66z0EaKdQ/o.jpg
## 10 https://s3-media4.fl.yelpcdn.com/bphoto/qfk9XMXuvS_0GOHknXZL6w/o.jpg
##    is_closed
##    <lgl>    
##  1 FALSE    
##  2 FALSE    
##  3 FALSE    
##  4 FALSE    
##  5 FALSE    
##  6 FALSE    
##  7 FALSE    
##  8 FALSE    
##  9 FALSE    
## 10 FALSE    
##    url                                                                          
##    <chr>                                                                        
##  1 https://www.yelp.com/biz/edenvale-garden-park-san-jose?adjust_creative=9FUT8…
##  2 https://www.yelp.com/biz/great-oaks-park-san-jose?adjust_creative=9FUT8HLBJS…
##  3 https://www.yelp.com/biz/chynoweth-park-san-jose?adjust_creative=9FUT8HLBJS3…
##  4 https://www.yelp.com/biz/danna-rock-park-san-jose?adjust_creative=9FUT8HLBJS…
##  5 https://www.yelp.com/biz/lake-cunningham-park-san-jose?adjust_creative=9FUT8…
##  6 https://www.yelp.com/biz/nisich-park-san-jose-2?adjust_creative=9FUT8HLBJS3n…
##  7 https://www.yelp.com/biz/welch-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7…
##  8 https://www.yelp.com/biz/hillview-park-san-jose?adjust_creative=9FUT8HLBJS3n…
##  9 https://www.yelp.com/biz/vieira-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ…
## 10 https://www.yelp.com/biz/lincoln-glen-park-san-jose?adjust_creative=9FUT8HLB…
##    review_count categories         rating transactions phone         
##           <int> <chr>               <dbl> <chr>        <chr>         
##  1           39 Parks                 4   ""           ""            
##  2            6 Parks                 3.5 ""           "+14087935510"
##  3            4 Parks, Playgrounds    3   ""           "+14087935510"
##  4            3 Parks                 2.5 ""           ""            
##  5           95 Parks                 3.5 ""           "+14087935510"
##  6            4 Parks                 4.5 ""           ""            
##  7            7 Parks                 3   ""           ""            
##  8            2 Parks, Playgrounds    4   ""           "+14087935510"
##  9           22 Parks                 4   ""           ""            
## 10           32 Parks                 4   ""           "+14085353570"
##    display_phone    distance price keys  coordinates.latitude
##    <chr>               <dbl> <chr> <chr>                <dbl>
##  1 ""                   526. <NA>  p                     37.3
##  2 "(408) 793-5510"     909. <NA>  p                     37.3
##  3 "(408) 793-5510"     683. <NA>  p                     37.3
##  4 ""                  1082. <NA>  p                     37.3
##  5 "(408) 793-5510"    1346. <NA>  p                     37.3
##  6 ""                  1804. <NA>  p                     37.3
##  7 ""                   689. <NA>  p                     37.3
##  8 "(408) 793-5510"     850. <NA>  p                     37.3
##  9 ""                  1063. <NA>  p                     37.3
## 10 "(408) 535-3570"    1553. <NA>  p                     37.3
##    coordinates.longitude location.address1            location.address2
##                    <dbl> <chr>                        <chr>            
##  1                 -122. 200 Edenvale Ave             ""               
##  2                 -122. 5248 Snow Dr                 ""               
##  3                 -122. Chynoweth Ave & Edenvale Ave ""               
##  4                 -122. 4524-4534 Houndshaven Way    ""               
##  5                 -122. 2305 S White Rd              ""               
##  6                 -122. 1401-1437 Suzay Ct           ""               
##  7                 -122. Kenesta Wy                   ""               
##  8                 -122. Adrian Way & Ocala Ave       ""               
##  9                 -122. 700 Adeline Ave              ""               
## 10                 -122. Radio Ave & Curtner Ave      ""               
##    location.address3 location.city location.zip_code location.country
##    <chr>             <chr>         <chr>             <chr>           
##  1 ""                San Jose      95136             US              
##  2 ""                San Jose      95111             US              
##  3 ""                San Jose      95136             US              
##  4 ""                San Jose      95111             US              
##  5 ""                San Jose      95101             US              
##  6 ""                San Jose      95122             US              
##  7 ""                San Jose      95122             US              
##  8 ""                San Jose      95122             US              
##  9 ""                San Jose      95136             US              
## 10 ""                San Jose      95125             US              
##    location.state location.display_address                        
##    <chr>          <chr>                                           
##  1 CA             200 Edenvale Ave, San Jose, CA 95136            
##  2 CA             5248 Snow Dr, San Jose, CA 95111                
##  3 CA             Chynoweth Ave & Edenvale Ave, San Jose, CA 95136
##  4 CA             4524-4534 Houndshaven Way, San Jose, CA 95111   
##  5 CA             2305 S White Rd, San Jose, CA 95101             
##  6 CA             1401-1437 Suzay Ct, San Jose, CA 95122          
##  7 CA             Kenesta Wy, San Jose, CA 95122                  
##  8 CA             Adrian Way & Ocala Ave, San Jose, CA 95122      
##  9 CA             700 Adeline Ave, San Jose, CA 95136             
## 10 CA             Radio Ave & Curtner Ave, San Jose, CA 95125     
## # … with 39,692 more rows
# Read the full data
my_yelp <- read_rds(here("D:/Georgia Tech/Spec topic_/yelp_all_4.rds"))

# Issue 2 ------------------------------
yelp_unique <- my_yelp %>% 
  distinct(id, .keep_all=T)

glue::glue("Before dropping NA, there were {nrow(my_yelp)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>% 
  print()
## Before dropping NA, there were 39702 rows. After dropping them, there are 5551 rows
# Issue 3 ------------------------------
yelp_flat <- yelp_unique %>% 
  # 1. Flattening columns with data frame
  jsonlite::flatten() %>% 
  # 2. Handling list-columns
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         categories = categories %>% map_chr(concate_list)) # concate_list is the custom function
# Issue 4 ------------------------------
yelp_flat %>% 
  map_dbl(., function(x) sum(is.na(x))) 
##                       id                    alias                     name 
##                        0                        0                        0 
##                image_url                is_closed                      url 
##                        0                        0                        0 
##             review_count               categories                   rating 
##                        0                        0                        0 
##             transactions                    phone            display_phone 
##                        0                        0                        0 
##                 distance                    price                     keys 
##                        0                     1650                        0 
##     coordinates.latitude    coordinates.longitude        location.address1 
##                        2                        2                       73 
##        location.address2        location.address3            location.city 
##                     1146                     1327                        0 
##        location.zip_code         location.country           location.state 
##                        0                        0                        0 
## location.display_address 
##                        0
# Fist, let's verify that the 4 missing values in lat/long columns are in the same rows.
identical(is.na(yelp_flat$coordinates.latitude),
          is.na(yelp_flat$coordinates.longitude)) # Yes, they are in the same 4 rows.
## [1] TRUE
# Drop them.
yelp_dropna1 <- yelp_flat %>% 
  drop_na(coordinates.longitude)

# Dropping NAs in price
yelp_dropna2 <- yelp_dropna1 %>% 
  drop_na(price)
head(yelp_dropna2)
##                       id                                alias
## 1 cToVW-dCXcxhqFQX_xSc-w      orchard-heritage-park-sunnyvale
## 2 Qbuy0qvdlXaXKDOpaDnCBg                  pho-bowl-san-jose-4
## 3 pyCLzdHoqs3ndmGV1vPamA             mingles-mango-san-jose-2
## 4 nm7YhFGOMAw1ojcX32RmpQ tacos-el-pollo-y-el-pollito-san-jose
## 5 Iw0u4il3SpX7rtY0ookQdA puro-michoacan-restaurant-san-jose-2
## 6 WE1O8_MfVq4kM4G1OVAP7g               trines-cafe-6-san-jose
##                          name
## 1       Orchard Heritage Park
## 2                    Pho Bowl
## 3              Mingle's Mango
## 4 Tacos El Pollo Y El Pollito
## 5   Puro Michoacan Restaurant
## 6               Trines Cafe 6
##                                                              image_url
## 1 https://s3-media2.fl.yelpcdn.com/bphoto/HSOJ1H2XNrLBhlATnPHoKg/o.jpg
## 2 https://s3-media2.fl.yelpcdn.com/bphoto/OwRnJ_0kxA_glrocLu1spg/o.jpg
## 3 https://s3-media3.fl.yelpcdn.com/bphoto/QZSPNHxnVyu9KBwDEqbkbw/o.jpg
## 4 https://s3-media2.fl.yelpcdn.com/bphoto/2OcE1RSGx8MEFtfj566Hew/o.jpg
## 5 https://s3-media2.fl.yelpcdn.com/bphoto/tPx6AVzj0Xh7SnKt1jalEQ/o.jpg
## 6 https://s3-media2.fl.yelpcdn.com/bphoto/KFiqnd1ETApqoPgSq_0Ebg/o.jpg
##   is_closed
## 1     FALSE
## 2     FALSE
## 3     FALSE
## 4     FALSE
## 5     FALSE
## 6     FALSE
##                                                                                                                                                                                                 url
## 1      https://www.yelp.com/biz/orchard-heritage-park-sunnyvale?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2                  https://www.yelp.com/biz/pho-bowl-san-jose-4?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 3             https://www.yelp.com/biz/mingles-mango-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 4 https://www.yelp.com/biz/tacos-el-pollo-y-el-pollito-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 5 https://www.yelp.com/biz/puro-michoacan-restaurant-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 6               https://www.yelp.com/biz/trines-cafe-6-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
##   review_count                     categories rating     transactions
## 1            8     Museums, Parks, Gift Shops      5                 
## 2          382      Vietnamese, Soup, Noodles      4         delivery
## 3          362 Asian Fusion, Dim Sum, Noodles      4 pickup, delivery
## 4           65           Mexican, Food Trucks      4         delivery
## 5           38                        Mexican      4                 
## 6           68                        Mexican      4         delivery
##          phone  display_phone  distance price keys coordinates.latitude
## 1 +14087490220 (408) 749-0220 1531.2446    $$    p             37.35776
## 2 +14082818288 (408) 281-8288  881.1513     $    a             37.25979
## 3 +14083001425 (408) 300-1425  778.2162    $$    a             37.26058
## 4 +14085092940 (408) 509-2940  702.4299     $    a             37.26053
## 5 +16692347944 (669) 234-7944  262.5733    $$    a             37.26297
## 6 +14082243243 (408) 224-3243  889.1281    $$    a             37.25960
##   coordinates.longitude  location.address1 location.address2 location.address3
## 1             -122.0268 560 E Remington Dr                                <NA>
## 2             -121.8077  5316 Monterey Hwy                                    
## 3             -121.8083      Monterey Rd D             Ste D                  
## 4             -121.8096  5270 Monterey Hwy                                    
## 5             -121.8136  5138 Monterey Hwy             Ste A              <NA>
## 6             -121.8078  5304 Monterey Hwy                                    
##   location.city location.zip_code location.country location.state
## 1     Sunnyvale             94087               US             CA
## 2      San Jose             95111               US             CA
## 3      San Jose             95111               US             CA
## 4      San Jose             95111               US             CA
## 5      San Jose             95111               US             CA
## 6      San Jose             95111               US             CA
##                       location.display_address
## 1      560 E Remington Dr, Sunnyvale, CA 94087
## 2        5316 Monterey Hwy, San Jose, CA 95111
## 3     Monterey Rd D, Ste D, San Jose, CA 95111
## 4        5270 Monterey Hwy, San Jose, CA 95111
## 5 5138 Monterey Hwy, Ste A, San Jose, CA 95111
## 6        5304 Monterey Hwy, San Jose, CA 95111
summary(yelp_dropna1$price)
##    Length     Class      Mode 
##      5549 character character
# census boundary
#census_a <- st_read("https://raw.githubusercontent.com/BonwooKoo/UrbanAnalytics2022/main/Lab/module_0/testdata.geojson") 
#getting the tract data 
census_1 <- suppressMessages(
  get_acs(geography = "tract", # or "block group", "county", "state" etc. 
          state = "CA",
          county = c("Santa Clara"), 
          variables = c(hhincome = 'B19019_001',
                        race.tot = "B02001_001", 
                        race.white = "B02001_002", 
                        race.black = 'B02001_003'
          ),
          year = 2019,
          survey = "acs5", # American Community Survey 5-year estimate
          geometry = TRUE, # returns sf objects
          output = "wide") # wide vs. long
)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |===================                                                   |  28%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |===============================                                       |  45%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |======================================================                |  78%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |========================================================              |  81%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
census_1_s <- separate(data= census_1, col=NAME, into= c('tract', 'county', 'state'), sep=",")

head(census_1_s)
## Simple feature collection with 6 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -121.8832 ymin: 37.2602 xmax: -121.7828 ymax: 37.34074
## Geodetic CRS:  NAD83
##         GEOID                tract              county       state hhincomeE
## 1 06085512042 Census Tract 5120.42  Santa Clara County  California     78382
## 2 06085503306 Census Tract 5033.06  Santa Clara County  California     87361
## 3 06085503108 Census Tract 5031.08  Santa Clara County  California    111618
## 4 06085503323 Census Tract 5033.23  Santa Clara County  California    135877
## 5 06085503401 Census Tract 5034.01  Santa Clara County  California     79318
## 6 06085512020 Census Tract 5120.20  Santa Clara County  California    102401
##   hhincomeM race.totE race.totM race.whiteE race.whiteM race.blackE race.blackM
## 1      3798      3020       292         878         173         151          79
## 2     14909      4373       379        1349         443          86          71
## 3     23929      8222       627        3301         619         637         211
## 4     14938      4763       402        1308         458         251         155
## 5     17404      4468       431        1202         366          75          65
## 6     10813      7825       879        4039         902         167         106
##                         geometry
## 1 MULTIPOLYGON (((-121.82 37....
## 2 MULTIPOLYGON (((-121.8318 3...
## 3 MULTIPOLYGON (((-121.8832 3...
## 4 MULTIPOLYGON (((-121.8006 3...
## 5 MULTIPOLYGON (((-121.8455 3...
## 6 MULTIPOLYGON (((-121.8419 3...
yelp_sf <- yelp_dropna1 %>% st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)

# sf subsets
yelp_in <- yelp_sf[census_1_s %>% st_transform(4326) %>%
                     filter(county %in% c(" Santa Clara County")) %>% 
                     st_union(), ,op = st_intersects]
nrow(yelp_in)
## [1] 5291
glue::glue("nrow before: {nrow(my_yelp)} -> nrow after: {nrow(yelp_in)} \n
            ncol before: {ncol(my_yelp)} -> ncol after: {ncol(yelp_in)} \n") %>% 
  print()
## nrow before: 39702 -> nrow after: 5291 
## 
## ncol before: 17 -> ncol after: 24
# Visualize
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(yelp_in) + tm_dots(col = "price")
# census is currently sfc. Convert it to sf.
census_sf <- census_1 %>% st_sf()
st_crs(census_sf) <- 4326
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
# Spatial join
census_yelp <- st_join(census_sf, yelp_in, join = st_intersects) #%>% st_transform(4326)



yelp_census <- st_join(yelp_in, census_sf, join = st_intersects)
# View
census_yelp %>% head()
## Simple feature collection with 6 features and 33 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -121.8318 ymin: 37.2602 xmax: -121.8083 ymax: 37.34074
## Geodetic CRS:  WGS 84
##           GEOID                                                 NAME hhincomeE
## 1   06085512042 Census Tract 5120.42, Santa Clara County, California     78382
## 2   06085503306 Census Tract 5033.06, Santa Clara County, California     87361
## 2.1 06085503306 Census Tract 5033.06, Santa Clara County, California     87361
## 2.2 06085503306 Census Tract 5033.06, Santa Clara County, California     87361
## 2.3 06085503306 Census Tract 5033.06, Santa Clara County, California     87361
## 2.4 06085503306 Census Tract 5033.06, Santa Clara County, California     87361
##     hhincomeM race.totE race.totM race.whiteE race.whiteM race.blackE
## 1        3798      3020       292         878         173         151
## 2       14909      4373       379        1349         443          86
## 2.1     14909      4373       379        1349         443          86
## 2.2     14909      4373       379        1349         443          86
## 2.3     14909      4373       379        1349         443          86
## 2.4     14909      4373       379        1349         443          86
##     race.blackM                     id
## 1            79 Iw0u4il3SpX7rtY0ookQdA
## 2            71 jcxoXMWrqreHQT0P5h1s9g
## 2.1          71 lZUd_rDdO5FyZQZfKjGqZw
## 2.2          71 4XaESbCqIsmETYxpPiv9Lg
## 2.3          71 NlYprq0SB1tNdgWv1GpXdA
## 2.4          71 smFno58h21Rd4RihK87xvw
##                                                     alias
## 1                    puro-michoacan-restaurant-san-jose-2
## 2                                     welch-park-san-jose
## 2.1                          cha-ca-long-phung-san-jose-2
## 2.2                      mexican-style-churros-san-jose-2
## 2.3 que-ta-banh-canh-trang-bang-udon-noodle-soup-san-jose
## 2.4                              taste-of-persia-san-jose
##                                               name
## 1                        Puro Michoacan Restaurant
## 2                                       Welch Park
## 2.1                              Cha Ca Long Phung
## 2.2                          Mexican Style Churros
## 2.3 Que Ta Banh Canh Trang Bang - Udon Noodle Soup
## 2.4                                Taste of Persia
##                                                                image_url
## 1   https://s3-media2.fl.yelpcdn.com/bphoto/tPx6AVzj0Xh7SnKt1jalEQ/o.jpg
## 2   https://s3-media4.fl.yelpcdn.com/bphoto/g5mj_1kJMixlfUYA2AgfWg/o.jpg
## 2.1 https://s3-media1.fl.yelpcdn.com/bphoto/HF_-0eyrZQ9_-lKVjNpUyg/o.jpg
## 2.2 https://s3-media3.fl.yelpcdn.com/bphoto/L4IDuy_D2wpBwiSC0P6qzw/o.jpg
## 2.3 https://s3-media1.fl.yelpcdn.com/bphoto/ykTpTBey8xmauZpnv3s22A/o.jpg
## 2.4 https://s3-media2.fl.yelpcdn.com/bphoto/6g5XM0Pgi3qIYv9PnkAESw/o.jpg
##     is_closed
## 1       FALSE
## 2       FALSE
## 2.1     FALSE
## 2.2     FALSE
## 2.3     FALSE
## 2.4     FALSE
##                                                                                                                                                                                                                    url
## 1                    https://www.yelp.com/biz/puro-michoacan-restaurant-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2                                     https://www.yelp.com/biz/welch-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.1                          https://www.yelp.com/biz/cha-ca-long-phung-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.2                      https://www.yelp.com/biz/mexican-style-churros-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.3 https://www.yelp.com/biz/que-ta-banh-canh-trang-bang-udon-noodle-soup-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2.4                              https://www.yelp.com/biz/taste-of-persia-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
##     review_count                     categories rating     transactions
## 1             38                        Mexican    4.0                 
## 2              7                          Parks    3.0                 
## 2.1          200                     Vietnamese    4.0                 
## 2.2           52              Desserts, Mexican    5.0                 
## 2.3          192   Vietnamese, Seafood, Noodles    3.5 pickup, delivery
## 2.4           82 Mediterranean, Persian/Iranian    4.0 pickup, delivery
##            phone  display_phone distance price keys   location.address1
## 1   +16692347944 (669) 234-7944 262.5733    $$    a   5138 Monterey Hwy
## 2                               689.0162  <NA>    p          Kenesta Wy
## 2.1 +14082549941 (408) 254-9941 533.5555     $    a       2145 Tully Rd
## 2.2 +14085129594 (408) 512-9594 757.5121     $    a 1812 Cunningham Ave
## 2.3 +14082591445 (408) 259-1445 603.5719    $$    a       2005 Tully Rd
## 2.4 +14084935978 (408) 493-5978 597.6317    $$    a       2011 Tully Rd
##     location.address2 location.address3 location.city location.zip_code
## 1               Ste A              <NA>      San Jose             95111
## 2                                            San Jose             95122
## 2.1                                          San Jose             95122
## 2.2                                <NA>      San Jose             95122
## 2.3              <NA>                        San Jose             95122
## 2.4                                <NA>      San Jose             95122
##     location.country location.state
## 1                 US             CA
## 2                 US             CA
## 2.1               US             CA
## 2.2               US             CA
## 2.3               US             CA
## 2.4               US             CA
##                         location.display_address                       geometry
## 1   5138 Monterey Hwy, Ste A, San Jose, CA 95111 MULTIPOLYGON (((-121.82 37....
## 2                 Kenesta Wy, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.1            2145 Tully Rd, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.2      1812 Cunningham Ave, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.3            2005 Tully Rd, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
## 2.4            2011 Tully Rd, San Jose, CA 95122 MULTIPOLYGON (((-121.8318 3...
yelp_census %>% head()
## Simple feature collection with 6 features and 33 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -121.8339 ymin: 37.25949 xmax: -121.8057 ymax: 37.33745
## Geodetic CRS:  WGS 84
##                       id                         alias                 name
## 1 PkOM7wJZzZ0DoxW84_uLDg edenvale-garden-park-san-jose Edenvale Garden Park
## 2 j-gBxd5Nkhr9iRHi_NJp7w      great-oaks-park-san-jose      Great Oaks Park
## 3 C41iNUrHTWYN9rUJSn_rJw       chynoweth-park-san-jose       Chynoweth Park
## 4 wvUCOyYuNcsc7g3OsMxWiA      danna-rock-park-san-jose      Danna Rock Park
## 5 sJ0RCNgqZ4nlLMl4LhKDmQ lake-cunningham-park-san-jose Lake Cunningham Park
## 6 R6Eb_p72vynnpq20aOLATg        nisich-park-san-jose-2          Nisich Park
##                                                              image_url
## 1 https://s3-media4.fl.yelpcdn.com/bphoto/yFTWpEtAU5xib85UvVq1pQ/o.jpg
## 2 https://s3-media4.fl.yelpcdn.com/bphoto/rcIYKSNUQ84vatIfw6_h7A/o.jpg
## 3 https://s3-media1.fl.yelpcdn.com/bphoto/gx6Ei11wytx_V215HF1gzg/o.jpg
## 4 https://s3-media3.fl.yelpcdn.com/bphoto/F0AQ22xR2eMrDiImNn66fA/o.jpg
## 5 https://s3-media3.fl.yelpcdn.com/bphoto/ZzHo_dwM5ar6CKV6TsxshQ/o.jpg
## 6 https://s3-media3.fl.yelpcdn.com/bphoto/ECTqvmIlk5LyKxKM7A-Row/o.jpg
##   is_closed
## 1     FALSE
## 2     FALSE
## 3     FALSE
## 4     FALSE
## 5     FALSE
## 6     FALSE
##                                                                                                                                                                                          url
## 1 https://www.yelp.com/biz/edenvale-garden-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 2      https://www.yelp.com/biz/great-oaks-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 3       https://www.yelp.com/biz/chynoweth-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 4      https://www.yelp.com/biz/danna-rock-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 5 https://www.yelp.com/biz/lake-cunningham-park-san-jose?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
## 6        https://www.yelp.com/biz/nisich-park-san-jose-2?adjust_creative=9FUT8HLBJS3nIJ7hDk8tZw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=9FUT8HLBJS3nIJ7hDk8tZw
##   review_count         categories rating transactions        phone
## 1           39              Parks    4.0                          
## 2            6              Parks    3.5              +14087935510
## 3            4 Parks, Playgrounds    3.0              +14087935510
## 4            3              Parks    2.5                          
## 5           95              Parks    3.5              +14087935510
## 6            4              Parks    4.5                          
##    display_phone  distance price keys            location.address1
## 1                 526.0228  <NA>    p             200 Edenvale Ave
## 2 (408) 793-5510  909.4110  <NA>    p                 5248 Snow Dr
## 3 (408) 793-5510  682.7830  <NA>    p Chynoweth Ave & Edenvale Ave
## 4                1082.4227  <NA>    p    4524-4534 Houndshaven Way
## 5 (408) 793-5510 1346.1879  <NA>    p              2305 S White Rd
## 6                1803.6135  <NA>    p           1401-1437 Suzay Ct
##   location.address2 location.address3 location.city location.zip_code
## 1                                          San Jose             95136
## 2                                          San Jose             95111
## 3                                          San Jose             95136
## 4                                          San Jose             95111
## 5                                          San Jose             95101
## 6                                          San Jose             95122
##   location.country location.state
## 1               US             CA
## 2               US             CA
## 3               US             CA
## 4               US             CA
## 5               US             CA
## 6               US             CA
##                           location.display_address       GEOID
## 1             200 Edenvale Ave, San Jose, CA 95136 06085512021
## 2                 5248 Snow Dr, San Jose, CA 95111 06085512017
## 3 Chynoweth Ave & Edenvale Ave, San Jose, CA 95136 06085512021
## 4    4524-4534 Houndshaven Way, San Jose, CA 95111 06085512017
## 5              2305 S White Rd, San Jose, CA 95101 06085503321
## 6           1401-1437 Suzay Ct, San Jose, CA 95122 06085503111
##                                                   NAME hhincomeE hhincomeM
## 1 Census Tract 5120.21, Santa Clara County, California    130460      7700
## 2 Census Tract 5120.17, Santa Clara County, California     92855     17920
## 3 Census Tract 5120.21, Santa Clara County, California    130460      7700
## 4 Census Tract 5120.17, Santa Clara County, California     92855     17920
## 5 Census Tract 5033.21, Santa Clara County, California    156667     28278
## 6 Census Tract 5031.11, Santa Clara County, California     99423     18943
##   race.totE race.totM race.whiteE race.whiteM race.blackE race.blackM
## 1      6491       502        3331         469         275         225
## 2      7565       781        3316         634         196         175
## 3      6491       502        3331         469         275         225
## 4      7565       781        3316         634         196         175
## 5      4690       355         419         182          44          49
## 6      5132       483        1167         444          17          27
##                     geometry
## 1 POINT (-121.8203 37.26251)
## 2 POINT (-121.8057 37.26807)
## 3 POINT (-121.8188 37.25949)
## 4 POINT (-121.8234 37.27216)
## 5 POINT (-121.8089 37.33745)
## 6 POINT (-121.8339 37.31825)
tm_shape(census_yelp %>% group_by(GEOID) %>% summarise(rating=mean(rating))) + 
  tm_polygons(col = "rating", style = "quantile")
tm_shape(yelp_census) + tm_dots(col="hhincomeE")
yelp_in %>% 
  # Use mutate bc the re-coded variable is a new variable
  mutate(review_count_binary = case_when(review_count > 1000 ~ "many",
                                         review_count <= 1000 ~ "few")) %>% 
  # Select these two columns to simplify the print out
  select(review_count, review_count_binary) %>% 
  head()
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -121.8339 ymin: 37.25949 xmax: -121.8057 ymax: 37.33745
## Geodetic CRS:  WGS 84
##   review_count review_count_binary                   geometry
## 1           39                 few POINT (-121.8203 37.26251)
## 2            6                 few POINT (-121.8057 37.26807)
## 3            4                 few POINT (-121.8188 37.25949)
## 4            3                 few POINT (-121.8234 37.27216)
## 5           95                 few POINT (-121.8089 37.33745)
## 6            4                 few POINT (-121.8339 37.31825)
yelp_in %>% 
  mutate(across(is.numeric, scale)) %>% 
  select(is.numeric)
## Warning: Predicate functions must be wrapped in `where()`.
## 
##   # Bad
##   data %>% select(is.numeric)
## 
##   # Good
##   data %>% select(where(is.numeric))
## 
## ℹ Please update your code.
## This message is displayed once per session.
## Simple feature collection with 5291 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -122.1847 ymin: 36.95046 xmax: -121.52 ymax: 37.45962
## Geodetic CRS:  WGS 84
## First 10 features:
##    review_count     rating    distance                   geometry
## 1    -0.5120726  0.4815183 -0.50796146 POINT (-121.8203 37.26251)
## 2    -0.5844259 -0.1124927 -0.32989489 POINT (-121.8057 37.26807)
## 3    -0.5888110 -0.7065038 -0.43515339 POINT (-121.8188 37.25949)
## 4    -0.5910035 -1.3005149 -0.24953872 POINT (-121.8234 37.27216)
## 5    -0.3892912 -0.1124927 -0.12703162 POINT (-121.8089 37.33745)
## 6    -0.5888110  1.0755294  0.08542202 POINT (-121.8339 37.31825)
## 7    -0.5822334 -0.7065038 -0.43225838  POINT (-121.8239 37.3255)
## 8    -0.5931960  0.4815183 -0.35756898 POINT (-121.8254 37.33876)
## 9    -0.5493455  0.4815183 -0.25870205 POINT (-121.8614 37.28697)
## 10   -0.5274203  0.4815183 -0.03095143 POINT (-121.8896 37.28915)