R Markdown

This is an R Markdown document for Mini 2!

#client_secret <- "heiQtuO2RnUpIIOYNIBNwQI_prhD0_we8HyhuISPJTY14XXQ-n3B0McQUxofxO6oWMBU1rH4Z6tbsJ3P1IQsdA8D_zKSX7oFCrM1Yv4fpFGqq8-9wNAFmKbL3gchY3Yx"
client_secret <- "d_8lzfKBfcZ1zp7QNd6DcLBDfQmnyio93Ab5DYjblRoh_75EL08VRMHFgW-0qm5zuChGQmPi7Ivm78TkIFXpe_2Cx20t4wNPNASZQlWPRUn-vOCaqeahzhScYi8qY3Yx"

Sys.getenv("CENSUS_API_KEY")
## [1] "dd63c5ddc43a230bd9d8e634b9d9d1fc26703e3b"
#state_pop <- get_decennial(geography = "state", variables = "P001001")
#head(state_pop)
tract <- suppressMessages(
  get_acs(geography = ("tract"), # or "block group", "county", "state" etc. 
          state = ("NJ"),
          county = c("Somerset"),
          variables = c(hhincome = 'B19019_001',
                        race.tot = "B02001_001", 
                        race.white = "B02001_002", 
                        race.black = 'B02001_003'
          ),
          year = 2019,
          survey = "acs5", # American Community Survey 5-year estimate
          geometry = TRUE, # returns sf objects
          output = "wide") # wide vs. long
)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |===                                                                   |   5%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=====================                                                 |  29%
  |                                                                            
  |======================                                                |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |============================                                          |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |========================================                              |  56%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |===================================================                   |  72%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |======================================================================| 100%
message(sprintf("nrow: %s, ncol: %s", nrow(tract), ncol(tract)))
## nrow: 68, ncol: 11
#print(tract)
tract <- tract %>% 
  select(GEOID,
         county = NAME,
         hhincome = hhincomeE, # New name = old name
         race.tot = race.totE,
         race.white = race.whiteE,
         race.black = race.blackE)

#print(tract)
#print tract after
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tract) + tm_borders()

Draw Tracts

# Function: Get tract-wise radius
get_r <- function(poly, epsg_id){
  #---------------------
  # Takes: a single POLYGON or LINESTRTING
  # Outputs: distance between the centroid of the boundingbox and a corner of the bounding box
  #---------------------
  
  # Get bounding box of a given polygon
  bb <- st_bbox(poly)
  # Get lat & long coordinates of any one corner of the bounding box.
  bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
  # Get centroid of the bb
  bb_center_x <- (bb[3]+bb[1])/2
  bb_center_y <- (bb[4]+bb[2])/2
  bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
  
  # Get the distance between bb_p and c
  r <- st_distance(bb_corner, bb_center)
  # Multiply 1.1 to make the circle a bit larger than the Census Tract.
  # See the Yelp explanation of their radius parameter to see why we do this.
  bb_center$radius <- r*1.2
  return(bb_center)
}
## Using a loop -----------------------------------------------------------------
# Creating an empty vector of NA. 
# Results will fill this vector
epsg_id <- 4326 # Edit (9/8/2022): 4326 measures distance in meter. Before the edit, I used 26967.

r4all_loop <- vector("list", nrow(tract))

# Starting a for-loop

for (i in 1:nrow(tract)){
  r4all_loop[[i]] <- tract %>% 
    st_transform(crs = epsg_id) %>% 
    st_geometry() %>% 
    .[[i]] %>% 
    get_r(epsg_id = epsg_id)
}

r4all_loop <- bind_rows(r4all_loop)


# Using a functional -----------------------------------------------------------
# We use a functional (sapply) to apply this custom function to each Census Tract.
r4all_apply <- tract %>%
  st_geometry() %>% 
  st_transform(crs = epsg_id) %>% 
  lapply(., function(x) get_r(x, epsg_id = epsg_id))

r4all_apply <- bind_rows(r4all_apply)

# Are these two identical?
identical(r4all_apply, r4all_loop)
## [1] TRUE
ready_4_yelp <- r4all_apply %>% 
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])
tmap_mode('view')
## tmap mode set to interactive viewing
# Select the first 10 rows
ready_4_yelp[1:10,] %>% 
  # Draw a buffer centered at the centroid of Tract polygons.
  # Radius of the buffer is the radius we just calculated using loop
  st_buffer(., dist = .$radius) %>% 
  # Display this buffer in red
  tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +
  # Display the original polygon in blue
  tm_shape(tract[1:10,]) + tm_borders(col= 'blue')
which_tract <- 1
test <- business_search(api_key = client_secret, # like we did for census, store your api key
                        categories = 'farms', # return only restaurant businesses
                        latitude = ready_4_yelp$y[which_tract],
                        longitude = ready_4_yelp$x[which_tract],
                        offset = 0, # 1st page, 1st obs
                        radius = round(ready_4_yelp$radius[which_tract]), # radius requires integer value
                        limit = 50) # how many business per page
## No encoding supplied: defaulting to UTF-8.
test1 <- business_search(api_key = client_secret, # like we did for census, store your api key
                        categories = 'restaurants', # return only restaurant businesses
                        latitude = ready_4_yelp$y[which_tract],
                        longitude = ready_4_yelp$x[which_tract],
                        offset = 0, # 1st page, 1st obs
                        radius = round(ready_4_yelp$radius[which_tract]), # radius requires integer value
                        limit = 50) # how many business per page
## No encoding supplied: defaulting to UTF-8.
lapply(test, head)
## $businesses
## list()
## 
## $total
## [1] 0
## 
## $region
## $region$center
## $region$center$longitude
## [1] -74.43781
## 
## $region$center$latitude
## [1] 40.61849
names(test)
## [1] "businesses" "total"      "region"
paste0("is it a data.frame?: ", is.data.frame(test$businesses), ", ",
       " how many rows?: ", nrow(test$businesses), ", ",
       " how many columns?: ", ncol(test$businesses))
## [1] "is it a data.frame?: FALSE,  how many rows?: ,  how many columns?: "
# FUNCTION
get_yelp <- function(tract, category){
  # ----------------------------------
  # Gets one row of tract information (1,) and category name (str),
  # Outputs a list of business data.frame
  n <- 1
  # First request --------------------------------------------------------------
  resp <- business_search(api_key = client_secret, 
                          categories = category, 
                          latitude = tract$y, 
                          longitude = tract$x, 
                          offset = (n - 1) * 50, # = 0 when n = 1
                          radius = round(tract$radius), 
                          limit = 50)
  # Calculate how many requests are needed in total
  required_n <- ceiling(resp$total/50)
  # out is where the results will be appended to.
  out <- vector("list", required_n)
  # Store the business information to nth slot in out
  out[[n]] <- resp$businesses
  # Change the name of the elements to the total required_n
  # This is to know if there are more than 1000 businesses,
  # we know how many.
  names(out)[n] <- required_n
  # Throw error if more than 1000
  if (resp$total >= 1000)
  {
    # glue formats string by inserting {n} with what's currently stored in object n.
    print(glue::glue("{n}th row has >= 1000 businesses."))
    # Stop before going into the loop because we need to
    # break down Census Tract to something smaller.
    return(out)
  } 
  else 
  {
    # add 1 to n
    n <- n + 1
    
    # Now we know required_n -----------------------------------------------------
    # Starting a loop
    while(n <= required_n){
      resp <- business_search(api_key = client_secret, 
                              categories = category, 
                              latitude = tract$y, 
                              longitude = tract$x, 
                              offset = (n - 1) * 50, 
                              radius = round(tract$radius), 
                              limit = 50)
      
      out[[n]] <- resp$businesses
      
      n <- n + 1
    } #<< end of while loop
    
    # Merge all elements in the list into a single data frame
    out <- out %>% bind_rows()
    
    return(out)
  }
}
yelp_first_tract <- get_yelp(ready_4_yelp[1,], "farms") %>% 
  as_tibble()
## No encoding supplied: defaulting to UTF-8.
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
yelp_first_tract %>% print
## # A tibble: 0 × 0
# Prepare a collector
yelp_all_list <- vector("list", nrow(ready_4_yelp))
# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
  yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "farms"))
  if (row %% 50 == 0){
    print(paste0("Current row: ", row))
    
  }
Sys.sleep(.5)}
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 50"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
#Start Test For Second Category
yelp_first_tract1 <- get_yelp(ready_4_yelp[1,], "indpak") %>% 
  as_tibble()
## No encoding supplied: defaulting to UTF-8.
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
yelp_first_tract1 %>% print
## # A tibble: 0 × 0
# Prepare a collector
yelp_all_list1 <- vector("list", nrow(ready_4_yelp))
# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
  yelp_all_list1[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "indpak"))
  if (row %% 50 == 0){
    print(paste0("Current row: ", row))
  }
  Sys.sleep(.5)
}
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 50"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
yelp_all_list <- c(yelp_all_list1,yelp_all_list)
#End Test For Second Category
# Collapsing the list into a data.frame
yelp_all <- yelp_all_list %>% bind_rows() %>% as_tibble()

# print
yelp_all %>% print(width=1000)
## # A tibble: 283 × 16
##    id                     alias                                   
##    <chr>                  <chr>                                   
##  1 bFTPiSm6MNuAIJe9mirp4w tandoori-mela-basking-ridge             
##  2 WpUy2Y9Bva8WHfPwHMqnpg arusuvai-somerville                     
##  3 co3tnIE4STWkI4Mv51uOoA curry-tub-express-bridgewater-township  
##  4 PtmdgOoEeyb5KiIR-t_jTQ the-curry-and-grill-factory-martinsville
##  5 d9GmqiY1Q2oZm1UXa4EdWw tacowala-somerville                     
##  6 NYKMBrepIhgEZPDP4vXmRw honest-somerville                       
##  7 EHLxcSpBEupBVTh2EGlPWw punjabi-rasoi-somerset-3                
##  8 4b8rxMf3fYPS0aWfV07_eg bayleaf-grill-somerset                  
##  9 6AnY7MrBubrFd5yTSppOZg sankranthi-somerset                     
## 10 SgCOSBup7gtXvQi0HUKz2Q masala-art-branchburg                   
##    name                       
##    <chr>                      
##  1 Tandoori Mela              
##  2 Arusuvai                   
##  3 Curry Tub Express          
##  4 The Curry and Grill Factory
##  5 Tacowala                   
##  6 Honest                     
##  7 Punjabi Rasoi              
##  8 Bayleaf Grill              
##  9 Sankranthi                 
## 10 Masala Art                 
##    image_url                                                           
##    <chr>                                                               
##  1 https://s3-media3.fl.yelpcdn.com/bphoto/ScRK-Wy01DLw_u6UEBzOWg/o.jpg
##  2 https://s3-media2.fl.yelpcdn.com/bphoto/kAn0kz2k1afX-D5cu0M33Q/o.jpg
##  3 https://s3-media2.fl.yelpcdn.com/bphoto/rZTVLbb47tHCJRq6yNUegg/o.jpg
##  4 https://s3-media1.fl.yelpcdn.com/bphoto/CR3vCDrpStEC5cEQ5ecRhg/o.jpg
##  5 https://s3-media1.fl.yelpcdn.com/bphoto/zjS79zXgIxAd9horOLjAFg/o.jpg
##  6 https://s3-media4.fl.yelpcdn.com/bphoto/VA3bD3sA2LGEA5bUHQbR9Q/o.jpg
##  7 https://s3-media3.fl.yelpcdn.com/bphoto/r5TVSqhGZL9LhFyFVZGl3w/o.jpg
##  8 https://s3-media4.fl.yelpcdn.com/bphoto/ym8PMIfZ2-0JdNZ1G6E5vA/o.jpg
##  9 https://s3-media2.fl.yelpcdn.com/bphoto/TgVsPewCAML9oSwPyJ_RTg/o.jpg
## 10 https://s3-media2.fl.yelpcdn.com/bphoto/WExT67guyQM3KDW0wK4pBw/o.jpg
##    is_closed
##    <lgl>    
##  1 FALSE    
##  2 FALSE    
##  3 FALSE    
##  4 FALSE    
##  5 FALSE    
##  6 FALSE    
##  7 FALSE    
##  8 FALSE    
##  9 FALSE    
## 10 FALSE    
##    url                                                                          
##    <chr>                                                                        
##  1 https://www.yelp.com/biz/tandoori-mela-basking-ridge?adjust_creative=YsnbCt8…
##  2 https://www.yelp.com/biz/arusuvai-somerville?adjust_creative=YsnbCt8xw2zmSFX…
##  3 https://www.yelp.com/biz/curry-tub-express-bridgewater-township?adjust_creat…
##  4 https://www.yelp.com/biz/the-curry-and-grill-factory-martinsville?adjust_cre…
##  5 https://www.yelp.com/biz/tacowala-somerville?adjust_creative=YsnbCt8xw2zmSFX…
##  6 https://www.yelp.com/biz/honest-somerville?adjust_creative=YsnbCt8xw2zmSFXqf…
##  7 https://www.yelp.com/biz/punjabi-rasoi-somerset-3?adjust_creative=YsnbCt8xw2…
##  8 https://www.yelp.com/biz/bayleaf-grill-somerset?adjust_creative=YsnbCt8xw2zm…
##  9 https://www.yelp.com/biz/sankranthi-somerset?adjust_creative=YsnbCt8xw2zmSFX…
## 10 https://www.yelp.com/biz/masala-art-branchburg?adjust_creative=YsnbCt8xw2zmS…
##    review_count categories   rating coordinates$latitude $longitude transactions
##           <int> <list>        <dbl>                <dbl>      <dbl> <list>      
##  1          117 <df [1 × 2]>    4                   40.7      -74.6 <chr [2]>   
##  2          199 <df [1 × 2]>    4                   40.6      -74.6 <chr [2]>   
##  3           14 <df [1 × 2]>    4.5                 40.6      -74.6 <chr [2]>   
##  4            5 <df [3 × 2]>    4                   40.6      -74.6 <chr [2]>   
##  5           20 <df [2 × 2]>    4                   40.6      -74.6 <chr [0]>   
##  6           11 <df [1 × 2]>    4.5                 40.6      -74.6 <chr [2]>   
##  7          103 <df [1 × 2]>    4.5                 40.5      -74.5 <chr [2]>   
##  8           91 <df [3 × 2]>    4.5                 40.5      -74.5 <chr [2]>   
##  9           13 <df [1 × 2]>    3.5                 40.5      -74.5 <chr [2]>   
## 10          192 <df [1 × 2]>    4                   40.6      -74.7 <chr [2]>   
##    price location$address1         $address2 $address3      
##    <chr> <chr>                     <chr>     <chr>          
##  1 $$    562 Allen Rd              ""        ""             
##  2 $$    30 E Main St              <NA>      ""             
##  3 <NA>  400 Commons Way           ""         <NA>          
##  4 <NA>  1801 Washington Valley Rd <NA>      ""             
##  5 <NA>  133 E Main St             ""         <NA>          
##  6 <NA>  97 US Hwy 206 N           ""         <NA>          
##  7 $$    1483 Rte 27 S             ""        "Franklin Mall"
##  8 $$    2 Jfk Blvd                <NA>      ""             
##  9 <NA>  2 John F Kennedy Blvd     <NA>      ""             
## 10 $$    1049 US Hwy 202 N         ""        ""             
##    $city                $zip_code $country $state $display_address
##    <chr>                <chr>     <chr>    <chr>  <list>          
##  1 Basking Ridge        07920     US       NJ     <chr [2]>       
##  2 Somerville           08876     US       NJ     <chr [2]>       
##  3 Bridgewater Township 08807     US       NJ     <chr [2]>       
##  4 Martinsville         08836     US       NJ     <chr [2]>       
##  5 Somerville           08876     US       NJ     <chr [2]>       
##  6 Somerville           08876     US       NJ     <chr [2]>       
##  7 Somerset             08873     US       NJ     <chr [3]>       
##  8 Somerset             08873     US       NJ     <chr [2]>       
##  9 Somerset             08873     US       NJ     <chr [2]>       
## 10 Branchburg           08876     US       NJ     <chr [2]>       
##    phone          display_phone    distance
##    <chr>          <chr>               <dbl>
##  1 "+19085061032" "(908) 506-1032"    1247.
##  2 "+17322002787" "(732) 200-2787"    5527.
##  3 ""             ""                  4751.
##  4 "+19084327077" "(908) 432-7077"     180.
##  5 "+19083008690" "(908) 300-8690"    5417.
##  6 "+19089893366" "(908) 989-3366"    6266.
##  7 "+17322464569" "(732) 246-4569"    2420.
##  8 "+17327070777" "(732) 707-0777"    2842.
##  9 "+17327070777" "(732) 707-0777"    2841.
## 10 "+19085752200" "(908) 575-2200"    2908.
## # … with 273 more rows
## # ℹ Use `print(n = ...)` to see more rows
# Extract coordinates
yelp_sf <- yelp_all %>% 
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>% 
  filter(!is.na(x) & !is.na(y)) %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326)

# Map
tm_shape(yelp_sf) +
  tm_dots(col = "review_count", style="quantile")
yelp_unique <- yelp_all %>% 
  distinct(id, .keep_all=T)
glue::glue("Before dropping NA, there were {nrow(yelp_all)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>% 
  print()
## Before dropping NA, there were 283 rows. After dropping them, there are 79 rows
#This makes sense as the Map only shows 8 points!!!
concate_list <- function(x){
  # x is a data frame with columns "alias" and "title" from Yelp$categories
  # returns a character vector containing category concatenated titles 
  titles <- x[["title"]] %>% str_c(collapse = ", ")
  return(titles)
}
# Custom function that takes the data frame in "categories" column in Yelp data
# and returns a character vector
# Issue 3 ------------------------------
yelp_flat <- yelp_unique %>% 
  # 1. Flattening columns with data frame
  jsonlite::flatten() %>% 
  # 2. Handling list-columns
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         categories = categories %>% map_chr(concate_list)) # concate_list is the custom function
yelp_flat %>% 
  map_dbl(., function(x) sum(is.na(x)))
##                       id                    alias                     name 
##                        0                        0                        0 
##                image_url                is_closed                      url 
##                        0                        0                        0 
##             review_count               categories                   rating 
##                        0                        0                        0 
##             transactions                    price                    phone 
##                        0                       27                        0 
##            display_phone                 distance     coordinates.latitude 
##                        0                        0                        0 
##    coordinates.longitude        location.address1        location.address2 
##                        0                        0                       23 
##        location.address3            location.city        location.zip_code 
##                       22                        0                        0 
##         location.country           location.state location.display_address 
##                        0                        0                        0
identical(is.na(yelp_flat$coordinates.latitude),
          is.na(yelp_flat$coordinates.longitude))
## [1] TRUE
yelp_dropna1 <- yelp_flat %>% 
  drop_na(coordinates.longitude)
# Dropping NAs in price
yelp_dropna2 <- yelp_dropna1 %>% 
  drop_na(price)
census <- tract
yelp_sf <- yelp_dropna2 %>% 
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
# sf subsets
print(st_crs(yelp_sf))
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
print(st_crs(tract))
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]
crs_to_use <- st_crs(yelp_sf) # store crs information from the van_boundary dataset
crs_epsg <- crs_to_use$epsg # store the epsg code from the van_boundary dataset
tract <- st_transform(tract,crs=crs_epsg)
yelp_in <- yelp_sf[tract %>% st_union(), ,op = st_intersects]
glue::glue("nrow before: {nrow(yelp_all)} -> nrow after: {nrow(yelp_in)} \n
            ncol before: {ncol(yelp_all)} -> ncol after: {ncol(yelp_in)} \n") %>% 
  print()
## nrow before: 283 -> nrow after: 24 
## 
## ncol before: 16 -> ncol after: 23
print(colnames(yelp_in))
##  [1] "id"                       "alias"                   
##  [3] "name"                     "image_url"               
##  [5] "is_closed"                "url"                     
##  [7] "review_count"             "categories"              
##  [9] "rating"                   "transactions"            
## [11] "price"                    "phone"                   
## [13] "display_phone"            "distance"                
## [15] "location.address1"        "location.address2"       
## [17] "location.address3"        "location.city"           
## [19] "location.zip_code"        "location.country"        
## [21] "location.state"           "location.display_address"
## [23] "geometry"
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(yelp_in) + tm_dots(col = "price")