R Markdown

This is an R Markdown document for Mini 3!

client_secret <- "heiQtuO2RnUpIIOYNIBNwQI_prhD0_we8HyhuISPJTY14XXQ-n3B0McQUxofxO6oWMBU1rH4Z6tbsJ3P1IQsdA8D_zKSX7oFCrM1Yv4fpFGqq8-9wNAFmKbL3gchY3Yx"
#client_secret <- "d_8lzfKBfcZ1zp7QNd6DcLBDfQmnyio93Ab5DYjblRoh_75EL08VRMHFgW-0qm5zuChGQmPi7Ivm78TkIFXpe_2Cx20t4wNPNASZQlWPRUn-vOCaqeahzhScYi8qY3Yx"

Sys.getenv("CENSUS_API_KEY")
## [1] "dd63c5ddc43a230bd9d8e634b9d9d1fc26703e3b"
#state_pop <- get_decennial(geography = "state", variables = "P001001")
#head(state_pop)
tract <- suppressMessages(
  get_acs(geography = ("tract"), # or "block group", "county", "state" etc. 
          state = ("GA"),
          county = c("Fulton", "Dekalb"),
          variables = c(hhincome = 'B19019_001',
                        race.tot = "B02001_001", 
                        race.white = "B02001_002", 
                        race.black = 'B02001_003',
                        trans.total = "B08006_001",
                        trans.car = "B08006_002",
                        trans.drovealone = "B08006_003",
                        trans.carpooled = "B08006_004", # Notice that I was not interested in 005-007 (2 person/ 4 person carpool etc.)
                        trans.pubtrans = "B08006_008", # Did not want to download any details about the type of public transport (009-0013)
                        trans.bicycle = "B08006_014",
                        trans.walk = "B08006_015",
                        trans.WfH = "B08006_017",
                        med_housexp = "B25104_001",
                        med_realestate_taxes = "B25103_001"
          ),
          year = 2019,
          survey = "acs5", # American Community Survey 5-year estimate
          geometry = TRUE, # returns sf objects
          output = "wide") # wide vs. long
)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  11%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |============                                                          |  16%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================| 100%
message(sprintf("nrow: %s, ncol: %s", nrow(tract), ncol(tract)))
## nrow: 349, ncol: 31
#print(tract)
tract <- tract %>% 
  select(GEOID,
         county = NAME,
         hhincome = hhincomeE, # New name = old name
         race.tot = race.totE,
         race.white = race.whiteE,
         race.black = race.blackE,
         trans.total = trans.totalE,
         trans.car = trans.carE,
         trans.drovealone = trans.drovealoneE,
         trans.carpooled = trans.carpooledE,
         trans.pubtrans = trans.pubtransE,
         trans.bicycle = trans.bicycleE,
         trans.walk = trans.walkE,
         trans.WfH = trans.WfHE,
         Med_HHExp = med_housexpE,
         med_RETaxes = med_realestate_taxesE)
#DISPLAY NEW FOR FULTON
tmap_mode("view")
## tmap mode set to interactive viewing
## tmap mode set to interactive viewing
RE_Taxes <- tm_shape(tract) + tm_polygons("med_RETaxes")
HH_exp <- tm_shape(tract) + tm_polygons("Med_HHExp")
tmap_arrange(RE_Taxes, HH_exp)
#END DISPLAY FOR FULTON
#print(tract)
#print tract after
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tract) + tm_borders()

Draw Tracts

# Function: Get tract-wise radius
get_r <- function(poly, epsg_id){
  #---------------------
  # Takes: a single POLYGON or LINESTRTING
  # Outputs: distance between the centroid of the boundingbox and a corner of the bounding box
  #---------------------
  
  # Get bounding box of a given polygon
  bb <- st_bbox(poly)
  # Get lat & long coordinates of any one corner of the bounding box.
  bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
  # Get centroid of the bb
  bb_center_x <- (bb[3]+bb[1])/2
  bb_center_y <- (bb[4]+bb[2])/2
  bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
  
  # Get the distance between bb_p and c
  r <- st_distance(bb_corner, bb_center)
  # Multiply 1.1 to make the circle a bit larger than the Census Tract.
  # See the Yelp explanation of their radius parameter to see why we do this.
  bb_center$radius <- r*1.2
  return(bb_center)
}
## Using a loop -----------------------------------------------------------------
# Creating an empty vector of NA. 
# Results will fill this vector
epsg_id <- 4326 # Edit (9/8/2022): 4326 measures distance in meter. Before the edit, I used 26967.

r4all_loop <- vector("list", nrow(tract))

# Starting a for-loop

for (i in 1:nrow(tract)){
  r4all_loop[[i]] <- tract %>% 
    st_transform(crs = epsg_id) %>% 
    st_geometry() %>% 
    .[[i]] %>% 
    get_r(epsg_id = epsg_id)
}

r4all_loop <- bind_rows(r4all_loop)


# Using a functional -----------------------------------------------------------
# We use a functional (sapply) to apply this custom function to each Census Tract.
r4all_apply <- tract %>%
  st_geometry() %>% 
  st_transform(crs = epsg_id) %>% 
  lapply(., function(x) get_r(x, epsg_id = epsg_id))

r4all_apply <- bind_rows(r4all_apply)

# Are these two identical?
identical(r4all_apply, r4all_loop)
## [1] TRUE
ready_4_yelp <- r4all_apply %>% 
  mutate(x = st_coordinates(.)[,1],
         y = st_coordinates(.)[,2])
tmap_mode('view')
## tmap mode set to interactive viewing
# Select the first 10 rows
ready_4_yelp[1:10,] %>% 
  # Draw a buffer centered at the centroid of Tract polygons.
  # Radius of the buffer is the radius we just calculated using loop
  st_buffer(., dist = .$radius) %>% 
  # Display this buffer in red
  tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +
  # Display the original polygon in blue
  tm_shape(tract[1:10,]) + tm_borders(col= 'blue')
which_tract <- 1
test <- business_search(api_key = client_secret, # like we did for census, store your api key
                        categories = 'bikerentals', # return only restaurant businesses
                        latitude = ready_4_yelp$y[which_tract],
                        longitude = ready_4_yelp$x[which_tract],
                        offset = 0, # 1st page, 1st obs
                        radius = round(ready_4_yelp$radius[which_tract]), # radius requires integer value
                        limit = 50) # how many business per page
## No encoding supplied: defaulting to UTF-8.
test1 <- business_search(api_key = client_secret, # like we did for census, store your api key
                        categories = 'restaurants', # return only restaurant businesses
                        latitude = ready_4_yelp$y[which_tract],
                        longitude = ready_4_yelp$x[which_tract],
                        offset = 0, # 1st page, 1st obs
                        radius = round(ready_4_yelp$radius[which_tract]), # radius requires integer value
                        limit = 50) # how many business per page
## No encoding supplied: defaulting to UTF-8.
lapply(test, head)
## $businesses
## list()
## 
## $total
## [1] 0
## 
## $region
## $region$center
## $region$center$longitude
## [1] -84.38286
## 
## $region$center$latitude
## [1] 33.78421
names(test)
## [1] "businesses" "total"      "region"
paste0("is it a data.frame?: ", is.data.frame(test$businesses), ", ",
       " how many rows?: ", nrow(test$businesses), ", ",
       " how many columns?: ", ncol(test$businesses))
## [1] "is it a data.frame?: FALSE,  how many rows?: ,  how many columns?: "
# FUNCTION
get_yelp <- function(tract, category){
  # ----------------------------------
  # Gets one row of tract information (1,) and category name (str),
  # Outputs a list of business data.frame
  n <- 1
  # First request --------------------------------------------------------------
  resp <- business_search(api_key = client_secret, 
                          categories = category, 
                          latitude = tract$y, 
                          longitude = tract$x, 
                          offset = (n - 1) * 50, # = 0 when n = 1
                          radius = round(tract$radius), 
                          limit = 50)
  # Calculate how many requests are needed in total
  required_n <- ceiling(resp$total/50)
  # out is where the results will be appended to.
  out <- vector("list", required_n)
  # Store the business information to nth slot in out
  out[[n]] <- resp$businesses
  # Change the name of the elements to the total required_n
  # This is to know if there are more than 1000 businesses,
  # we know how many.
  names(out)[n] <- required_n
  # Throw error if more than 1000
  if (resp$total >= 1000)
  {
    # glue formats string by inserting {n} with what's currently stored in object n.
    print(glue::glue("{n}th row has >= 1000 businesses."))
    # Stop before going into the loop because we need to
    # break down Census Tract to something smaller.
    return(out)
  } 
  else 
  {
    # add 1 to n
    n <- n + 1
    
    # Now we know required_n -----------------------------------------------------
    # Starting a loop
    while(n <= required_n){
      resp <- business_search(api_key = client_secret, 
                              categories = category, 
                              latitude = tract$y, 
                              longitude = tract$x, 
                              offset = (n - 1) * 50, 
                              radius = round(tract$radius), 
                              limit = 50)
      
      out[[n]] <- resp$businesses
      
      n <- n + 1
    } #<< end of while loop
    
    # Merge all elements in the list into a single data frame
    out <- out %>% bind_rows()
    
    return(out)
  }
}
yelp_first_tract <- get_yelp(ready_4_yelp[1,], "bikerentals") %>% 
  as_tibble()
## No encoding supplied: defaulting to UTF-8.
## Warning: Outer names are only allowed for unnamed scalar atomic inputs
yelp_first_tract %>% print
## # A tibble: 0 × 0
# Prepare a collector
yelp_all_list <- vector("list", nrow(ready_4_yelp))
# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
  yelp_all_list[[row]] <- suppressMessages(get_yelp(ready_4_yelp[row,], "bikerentals"))
  if (row %% 50 == 0){
    print(paste0("Current row: ", row))
    
  }
Sys.sleep(.5)}
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 50"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 100"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 150"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 200"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 250"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
## [1] "Current row: 300"
## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs

## Warning: Outer names are only allowed for unnamed scalar atomic inputs
#Start Test For Second Category
#yelp_first_tract1 <- get_yelp(ready_4_yelp[1,], "indpak") %>% 
#  as_tibble()
#yelp_first_tract1 %>% print
# Prepare a collector
#yelp_all_list1 <- vector("list", nrow(ready_4_yelp))
# Looping through all Census Tracts
#for (row in 1:nrow(ready_4_yelp)){
#  yelp_all_list1[[row]] <- #suppressMessages(get_yelp(ready_4_yelp[row,], "indpak"))
#  if (row %% 50 == 0){
#    print(paste0("Current row: ", row))
#  }
#  Sys.sleep(.5)
#}
#yelp_all_list <- c(yelp_all_list1,yelp_all_list)
#End Test For Second Category
# Collapsing the list into a data.frame
yelp_all <- yelp_all_list %>% bind_rows() %>% as_tibble()

# print
yelp_all %>% print(width=1000)
## # A tibble: 208 × 16
##    id                     alias                                      
##    <chr>                  <chr>                                      
##  1 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain                
##  2 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain                
##  3 JkkHRgYj0mvdgbMXFm436w civil-bikes-atlanta                        
##  4 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain                
##  5 UmftRC3h0h_owHEm5ZLp7Q jump-atlanta-2                             
##  6 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain                
##  7 JkkHRgYj0mvdgbMXFm436w civil-bikes-atlanta                        
##  8 kJJiJqGbiO_QXmdhol3hIQ british-and-american-bikes-atlanta         
##  9 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain                
## 10 ot4UyUsRAlTFudTlCVRMrQ pedego-electric-bikes-alpharetta-alpharetta
##    name                            
##    <chr>                           
##  1 Aztec Cycles                    
##  2 Aztec Cycles                    
##  3 Civil Bikes                     
##  4 Aztec Cycles                    
##  5 JUMP                            
##  6 Aztec Cycles                    
##  7 Civil Bikes                     
##  8 British and American Bikes      
##  9 Aztec Cycles                    
## 10 Pedego Electric Bikes Alpharetta
##    image_url                                                           
##    <chr>                                                               
##  1 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
##  2 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
##  3 https://s3-media4.fl.yelpcdn.com/bphoto/JqTLT-chrqtbyuoB-52gdw/o.jpg
##  4 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
##  5 https://s3-media2.fl.yelpcdn.com/bphoto/D87H00XdLWZJS-LvQkTalA/o.jpg
##  6 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
##  7 https://s3-media4.fl.yelpcdn.com/bphoto/JqTLT-chrqtbyuoB-52gdw/o.jpg
##  8 https://s3-media2.fl.yelpcdn.com/bphoto/9I0G3Ge2yKFg6184t1XYEQ/o.jpg
##  9 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 10 https://s3-media2.fl.yelpcdn.com/bphoto/gPNidtknlSWKu3Bh4iOJ3Q/o.jpg
##    is_closed
##    <lgl>    
##  1 FALSE    
##  2 FALSE    
##  3 FALSE    
##  4 FALSE    
##  5 FALSE    
##  6 FALSE    
##  7 FALSE    
##  8 FALSE    
##  9 FALSE    
## 10 FALSE    
##    url                                                                          
##    <chr>                                                                        
##  1 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=2LH0tJl…
##  2 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=2LH0tJl…
##  3 https://www.yelp.com/biz/civil-bikes-atlanta?adjust_creative=2LH0tJl-lVjcpTq…
##  4 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=2LH0tJl…
##  5 https://www.yelp.com/biz/jump-atlanta-2?adjust_creative=2LH0tJl-lVjcpTqQqmZP…
##  6 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=2LH0tJl…
##  7 https://www.yelp.com/biz/civil-bikes-atlanta?adjust_creative=2LH0tJl-lVjcpTq…
##  8 https://www.yelp.com/biz/british-and-american-bikes-atlanta?adjust_creative=…
##  9 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=2LH0tJl…
## 10 https://www.yelp.com/biz/pedego-electric-bikes-alpharetta-alpharetta?adjust_…
##    review_count categories   rating coordinates$latitude $longitude transactions
##           <int> <list>        <dbl>                <dbl>      <dbl> <list>      
##  1           54 <df [3 × 2]>    5                   33.8      -84.2 <list [0]>  
##  2           54 <df [3 × 2]>    5                   33.8      -84.2 <list [0]>  
##  3            9 <df [2 × 2]>    4.5                 33.7      -84.4 <list [0]>  
##  4           54 <df [3 × 2]>    5                   33.8      -84.2 <list [0]>  
##  5            1 <df [2 × 2]>    1                   33.7      -84.4 <list [0]>  
##  6           54 <df [3 × 2]>    5                   33.8      -84.2 <list [0]>  
##  7            9 <df [2 × 2]>    4.5                 33.7      -84.4 <list [0]>  
##  8            2 <df [2 × 2]>    5                   33.9      -84.3 <list [0]>  
##  9           54 <df [3 × 2]>    5                   33.8      -84.2 <list [0]>  
## 10            7 <df [3 × 2]>    5                   34.0      -84.3 <list [0]>  
##    price location$address1          $address2   $address3 $city         
##    <chr> <chr>                      <chr>       <chr>     <chr>         
##  1 $$    "901 Main St"              ""          ""        Stone Mountain
##  2 $$    "901 Main St"              ""          ""        Stone Mountain
##  3 <NA>  ""                          <NA>       ""        Atlanta       
##  4 $$    "901 Main St"              ""          ""        Stone Mountain
##  5 <NA>  ""                         ""          <NA>      Atlanta       
##  6 $$    "901 Main St"              ""          ""        Stone Mountain
##  7 <NA>  ""                          <NA>       ""        Atlanta       
##  8 <NA>  "4264 F Winters Chapel Rd" ""          ""        Atlanta       
##  9 $$    "901 Main St"              ""          ""        Stone Mountain
## 10 <NA>  "6480 N Point Pkwy"        "Ste 1100b" <NA>      Alpharetta    
##    $zip_code $country $state $display_address phone        display_phone 
##    <chr>     <chr>    <chr>  <list>           <chr>        <chr>         
##  1 30083     US       GA     <chr [2]>        +16786369043 (678) 636-9043
##  2 30083     US       GA     <chr [2]>        +16786369043 (678) 636-9043
##  3 30312     US       GA     <chr [1]>        +14043238754 (404) 323-8754
##  4 30083     US       GA     <chr [2]>        +16786369043 (678) 636-9043
##  5 30301     US       GA     <chr [1]>        +18333006106 (833) 300-6106
##  6 30083     US       GA     <chr [2]>        +16786369043 (678) 636-9043
##  7 30312     US       GA     <chr [1]>        +14043238754 (404) 323-8754
##  8 30360     US       GA     <chr [2]>        +17704518868 (770) 451-8868
##  9 30083     US       GA     <chr [2]>        +16786369043 (678) 636-9043
## 10 30022     US       GA     <chr [3]>        +14042810264 (404) 281-0264
##    distance
##       <dbl>
##  1   22818.
##  2   29547.
##  3     366.
##  4   20724.
##  5    1238.
##  6   20887.
##  7     527.
##  8    1715.
##  9    1921.
## 10    2501.
## # … with 198 more rows
## # ℹ Use `print(n = ...)` to see more rows
# Extract coordinates
yelp_sf <- yelp_all %>% 
  mutate(x = .$coordinates$longitude,
         y = .$coordinates$latitude) %>% 
  filter(!is.na(x) & !is.na(y)) %>% 
  st_as_sf(coords = c("x", "y"), crs = 4326)

# Map
tm_shape(yelp_sf) +
  tm_dots(col = "review_count", style="quantile")
yelp_unique <- yelp_all %>% 
  distinct(id, .keep_all=T)
glue::glue("Before dropping NA, there were {nrow(yelp_all)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>% 
  print()
## Before dropping NA, there were 208 rows. After dropping them, there are 16 rows
#This makes sense as the Map only shows 8 points!!!
concate_list <- function(x){
  # x is a data frame with columns "alias" and "title" from Yelp$categories
  # returns a character vector containing category concatenated titles 
  titles <- x[["title"]] %>% str_c(collapse = ", ")
  return(titles)
}
# Custom function that takes the data frame in "categories" column in Yelp data
# and returns a character vector
# Issue 3 ------------------------------
yelp_flat <- yelp_unique %>% 
  # 1. Flattening columns with data frame
  jsonlite::flatten() %>% 
  # 2. Handling list-columns
  mutate(transactions = transactions %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         location.display_address = location.display_address %>% 
           map_chr(., function(x) str_c(x, collapse=", ")),
         categories = categories %>% map_chr(concate_list)) # concate_list is the custom function
yelp_flat %>% 
  map_dbl(., function(x) sum(is.na(x)))
##                       id                    alias                     name 
##                        0                        0                        0 
##                image_url                is_closed                      url 
##                        0                        0                        0 
##             review_count               categories                   rating 
##                        0                        0                        0 
##             transactions                    price                    phone 
##                        0                        9                        0 
##            display_phone                 distance     coordinates.latitude 
##                        0                        0                        0 
##    coordinates.longitude        location.address1        location.address2 
##                        0                        1                        5 
##        location.address3            location.city        location.zip_code 
##                        7                        0                        0 
##         location.country           location.state location.display_address 
##                        0                        0                        0
identical(is.na(yelp_flat$coordinates.latitude),
          is.na(yelp_flat$coordinates.longitude))
## [1] TRUE
yelp_dropna1 <- yelp_flat %>% 
  drop_na(coordinates.longitude)
# Dropping NAs in price
yelp_dropna2 <- yelp_dropna1 %>% 
  drop_na(price)
census <- tract
yelp_sf <- yelp_dropna2 %>% 
  st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
# sf subsets
print(st_crs(yelp_sf))
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
print(st_crs(tract))
## Coordinate Reference System:
##   User input: NAD83 
##   wkt:
## GEOGCRS["NAD83",
##     DATUM["North American Datum 1983",
##         ELLIPSOID["GRS 1980",6378137,298.257222101,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4269]]
crs_to_use <- st_crs(yelp_sf) # store crs information from the van_boundary dataset
crs_epsg <- crs_to_use$epsg # store the epsg code from the van_boundary dataset
tract <- st_transform(tract,crs=crs_epsg)
yelp_in <- yelp_sf[tract %>% st_union(), ,op = st_intersects]
glue::glue("nrow before: {nrow(yelp_all)} -> nrow after: {nrow(yelp_in)} \n
            ncol before: {ncol(yelp_all)} -> ncol after: {ncol(yelp_in)} \n") %>% 
  print()
## nrow before: 208 -> nrow after: 6 
## 
## ncol before: 16 -> ncol after: 23
print(colnames(yelp_in))
##  [1] "id"                       "alias"                   
##  [3] "name"                     "image_url"               
##  [5] "is_closed"                "url"                     
##  [7] "review_count"             "categories"              
##  [9] "rating"                   "transactions"            
## [11] "price"                    "phone"                   
## [13] "display_phone"            "distance"                
## [15] "location.address1"        "location.address2"       
## [17] "location.address3"        "location.city"           
## [19] "location.zip_code"        "location.country"        
## [21] "location.state"           "location.display_address"
## [23] "geometry"
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(yelp_in) + tm_dots(col = "price")
head(tract)
## Simple feature collection with 6 features and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -84.41692 ymin: 33.72043 xmax: -84.33426 ymax: 33.97001
## Geodetic CRS:  WGS 84
##         GEOID                                      county hhincome race.tot
## 1 13121001100     Census Tract 11, Fulton County, Georgia   109426     5193
## 2 13121009603  Census Tract 96.03, Fulton County, Georgia    83243     4830
## 3 13121005800     Census Tract 58, Fulton County, Georgia    42679     1479
## 4 13121010117 Census Tract 101.17, Fulton County, Georgia    60731     4319
## 5 13121009502  Census Tract 95.02, Fulton County, Georgia    86053     3867
## 6 13089021213 Census Tract 212.13, DeKalb County, Georgia   154063     3526
##   race.white race.black trans.total trans.car trans.drovealone trans.carpooled
## 1       4060        299        3972      2482             2350             132
## 2       3561        749        3293      2786             2742              44
## 3        300       1057         736       398              341              57
## 4       1701       2007        2941      2346             2217             129
## 5       2627        796        2087      1774             1593             181
## 6       3196         67        1365      1098              981             117
##   trans.pubtrans trans.bicycle trans.walk trans.WfH Med_HHExp med_RETaxes
## 1            276            71        632       455      3432        3699
## 2             63             0         22       303      3071        4869
## 3            271            12         43        12       576         493
## 4            178            24         15       282      1962        1330
## 5             17             0         62       161      2254        1993
## 6             36             0          0       213      1205        5987
##                         geometry
## 1 MULTIPOLYGON (((-84.38782 3...
## 2 MULTIPOLYGON (((-84.38738 3...
## 3 MULTIPOLYGON (((-84.41692 3...
## 4 MULTIPOLYGON (((-84.36575 3...
## 5 MULTIPOLYGON (((-84.39472 3...
## 6 MULTIPOLYGON (((-84.34783 3...
head(yelp_in)
## Simple feature collection with 6 features and 22 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -84.36515 ymin: 33.75833 xmax: -84.16982 ymax: 34.05956
## Geodetic CRS:  WGS 84
##                       id                        alias                 name
## 1 FK7-M9BGyCgpEmVifcPfoA  aztec-cycles-stone-mountain         Aztec Cycles
## 2 LsNS77QoD4wauKgGeCfxPQ     dads-emissions-decatur-4      Dad's Emissions
## 3 b3nacMG8PR77GNCaI4RBKA atlanta-bicycle-barn-atlanta Atlanta Bicycle Barn
## 4 BozJwfoXvoDEUj-sgr7WDg    podium-multisport-atlanta    Podium Multisport
## 5 OJVvH1CUZacuHjSLkhSKOg     roswell-bicycles-roswell     Roswell Bicycles
## 6 cRTM5f8ATvVr9lrKcOnWgg    the-sport-factory-roswell    The Sport Factory
##                                                              image_url
## 1 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 2 https://s3-media3.fl.yelpcdn.com/bphoto/_W7WHceEBLyPI86R2-3-Vw/o.jpg
## 3 https://s3-media3.fl.yelpcdn.com/bphoto/Ik2pMce41_MRcg3svjTbSQ/o.jpg
## 4 https://s3-media1.fl.yelpcdn.com/bphoto/T78pUkBulcQ5osv4mVGTiQ/o.jpg
## 5 https://s3-media2.fl.yelpcdn.com/bphoto/1YUjKn4QEkVQG_eIH7wiXQ/o.jpg
## 6 https://s3-media2.fl.yelpcdn.com/bphoto/KbtggaK_z06YtHcweq5beg/o.jpg
##   is_closed
## 1     FALSE
## 2     FALSE
## 3     FALSE
## 4     FALSE
## 5     FALSE
## 6     FALSE
##                                                                                                                                                                                         url
## 1  https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=2LH0tJl-lVjcpTqQqmZPWQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2LH0tJl-lVjcpTqQqmZPWQ
## 2     https://www.yelp.com/biz/dads-emissions-decatur-4?adjust_creative=2LH0tJl-lVjcpTqQqmZPWQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2LH0tJl-lVjcpTqQqmZPWQ
## 3 https://www.yelp.com/biz/atlanta-bicycle-barn-atlanta?adjust_creative=2LH0tJl-lVjcpTqQqmZPWQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2LH0tJl-lVjcpTqQqmZPWQ
## 4    https://www.yelp.com/biz/podium-multisport-atlanta?adjust_creative=2LH0tJl-lVjcpTqQqmZPWQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2LH0tJl-lVjcpTqQqmZPWQ
## 5     https://www.yelp.com/biz/roswell-bicycles-roswell?adjust_creative=2LH0tJl-lVjcpTqQqmZPWQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2LH0tJl-lVjcpTqQqmZPWQ
## 6    https://www.yelp.com/biz/the-sport-factory-roswell?adjust_creative=2LH0tJl-lVjcpTqQqmZPWQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=2LH0tJl-lVjcpTqQqmZPWQ
##   review_count                                   categories rating transactions
## 1           54 Bikes, Bike Repair/Maintenance, Bike Rentals    5.0             
## 2           64     Smog Check Stations, Bikes, Bike Rentals    4.5             
## 3          124 Bike Rentals, Bike Repair/Maintenance, Bikes    4.5             
## 4           19                          Bikes, Bike Rentals    4.5             
## 5           70                          Bikes, Bike Rentals    4.0             
## 6            1 Bike Repair/Maintenance, Bikes, Bike Rentals    1.0             
##   price        phone  display_phone  distance   location.address1
## 1    $$ +16786369043 (678) 636-9043 22817.680         901 Main St
## 2     $ +14042946644 (404) 294-6644  1427.664      1707 Church St
## 3    $$ +17708732413 (770) 873-2413  1191.788   151 Sampson St NE
## 4   $$$ +14048923400 (404) 892-3400  2221.750 1167 Zonolite Pl NE
## 5   $$$ +17706424057 (770) 642-4057  2841.926       670 Houze Way
## 6  $$$$ +16783889835 (678) 388-9835  2253.020      720 Hembree Pl
##   location.address2 location.address3  location.city location.zip_code
## 1                                     Stone Mountain             30083
## 2              <NA>              <NA>        Decatur             30030
## 3                                            Atlanta             30312
## 4            Ste A2                          Atlanta             30306
## 5                                            Roswell             30076
## 6                                <NA>        Roswell             30076
##   location.country location.state
## 1               US             GA
## 2               US             GA
## 3               US             GA
## 4               US             GA
## 5               US             GA
## 6               US             GA
##                         location.display_address                   geometry
## 1          901 Main St, Stone Mountain, GA 30083 POINT (-84.16982 33.80586)
## 2              1707 Church St, Decatur, GA 30030 POINT (-84.28105 33.79757)
## 3           151 Sampson St NE, Atlanta, GA 30312 POINT (-84.36515 33.75833)
## 4 1167 Zonolite Pl NE, Ste A2, Atlanta, GA 30306  POINT (-84.34143 33.8051)
## 5               670 Houze Way, Roswell, GA 30076 POINT (-84.34202 34.04687)
## 6              720 Hembree Pl, Roswell, GA 30076 POINT (-84.31985 34.05956)
bikeRental_in_tract <- st_join(tract, yelp_in, join = st_intersects)
skim(bikeRental_in_tract)
## Warning: Couldn't find skimmers for class: sfc_MULTIPOLYGON, sfc; No user-
## defined `sfl` provided. Falling back to `character`.
Data summary
Name bikeRental_in_tract
Number of rows 349
Number of columns 39
_______________________
Column type frequency:
character 21
logical 1
numeric 17
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
GEOID 0 1.00 11 11 0 349 0
county 0 1.00 38 43 0 349 0
id 343 0.02 22 22 0 6 0
alias 343 0.02 24 28 0 6 0
name 343 0.02 12 20 0 6 0
image_url 343 0.02 68 68 0 6 0
url 343 0.02 181 185 0 6 0
categories 343 0.02 19 44 0 5 0
transactions 343 0.02 0 0 6 1 0
price 343 0.02 1 4 0 4 0
phone 343 0.02 12 12 0 6 0
display_phone 343 0.02 14 14 0 6 0
location.address1 343 0.02 11 19 0 6 0
location.address2 344 0.01 0 6 4 2 0
location.address3 345 0.01 0 0 4 1 0
location.city 343 0.02 7 14 0 4 0
location.zip_code 343 0.02 5 5 0 5 0
location.country 343 0.02 2 2 0 1 0
location.state 343 0.02 2 2 0 1 0
location.display_address 343 0.02 32 46 0 6 0
geometry 0 1.00 174 3684 0 349 0

Variable type: logical

skim_variable n_missing complete_rate mean count
is_closed 343 0.02 0 FAL: 6

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
hhincome 4 0.99 73209.99 41838.74 12485.00 42517.00 63269.00 92404.00 236149.00 ▇▆▂▁▁
race.tot 0 1.00 5116.11 2794.29 0.00 3242.00 4599.00 6224.00 21010.00 ▆▇▁▁▁
race.white 0 1.00 2061.66 1960.52 0.00 286.00 1618.00 3378.00 12764.00 ▇▅▁▁▁
race.black 0 1.00 2469.18 2750.80 0.00 658.00 1433.00 3215.00 19263.00 ▇▂▁▁▁
trans.total 0 1.00 2579.90 1461.48 0.00 1515.00 2426.00 3358.00 9813.00 ▆▇▂▁▁
trans.car 0 1.00 2062.77 1295.37 0.00 1105.00 1945.00 2726.00 8545.00 ▇▇▂▁▁
trans.drovealone 0 1.00 1846.89 1192.38 0.00 1005.00 1660.00 2454.00 8070.00 ▇▇▁▁▁
trans.carpooled 0 1.00 215.88 199.04 0.00 83.00 155.00 289.00 1304.00 ▇▂▁▁▁
trans.pubtrans 0 1.00 192.57 177.80 0.00 69.00 148.00 262.00 1158.00 ▇▃▁▁▁
trans.bicycle 0 1.00 12.11 29.40 0.00 0.00 0.00 11.00 225.00 ▇▁▁▁▁
trans.walk 0 1.00 58.21 114.16 0.00 0.00 23.00 61.00 889.00 ▇▁▁▁▁
trans.WfH 0 1.00 209.55 180.30 0.00 79.00 169.00 280.00 1218.00 ▇▃▁▁▁
Med_HHExp 0 1.00 1985.71 1061.13 0.00 1246.00 1837.00 2572.00 6723.00 ▅▇▃▁▁
med_RETaxes 19 0.95 2940.77 2190.89 199.00 1253.50 2326.00 4192.25 10001.00 ▇▅▃▁▁
review_count 343 0.02 55.33 43.08 1.00 27.75 59.00 68.50 124.00 ▅▁▇▁▂
rating 343 0.02 3.92 1.46 1.00 4.12 4.50 4.50 5.00 ▂▁▁▂▇
distance 343 0.02 5458.97 8525.16 1191.79 1626.19 2237.39 2694.70 22817.68 ▇▁▁▁▂
bikeRental_count_tract <- count(as_tibble(bikeRental_in_tract), GEOID) %>%
  print()
## # A tibble: 349 × 2
##    GEOID           n
##    <chr>       <int>
##  1 13089020100     1
##  2 13089020200     1
##  3 13089020300     1
##  4 13089020400     1
##  5 13089020500     1
##  6 13089020600     1
##  7 13089020700     1
##  8 13089020801     1
##  9 13089020802     1
## 10 13089020900     1
## # … with 339 more rows
## # ℹ Use `print(n = ...)` to see more rows
# Join tract geometry with the number of Bike Rental businesses in tract
test <- st_join(tract, yelp_in %>% mutate(count = 1))
out <- test %>%
  group_by(GEOID) %>%
  summarise(count = sum(count, na.rm = T))

# Lets' check to see if the polygons and the point data on match
tm_shape(out) + tm_polygons(col = "count") + tm_shape(yelp_in)  + tm_dots()
FD_tract_Geom_bikeRental <- tract %>%
  left_join(out %>% st_set_geometry(NULL), by = "GEOID")
skim(FD_tract_Geom_bikeRental)
## Warning: Couldn't find skimmers for class: sfc_MULTIPOLYGON, sfc; No user-
## defined `sfl` provided. Falling back to `character`.
Data summary
Name FD_tract_Geom_bikeRental
Number of rows 349
Number of columns 18
_______________________
Column type frequency:
character 3
numeric 15
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
GEOID 0 1 11 11 0 349 0
county 0 1 38 43 0 349 0
geometry 0 1 174 3684 0 349 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
hhincome 4 0.99 73209.99 41838.74 12485 42517.0 63269 92404.00 236149 ▇▆▂▁▁
race.tot 0 1.00 5116.11 2794.29 0 3242.0 4599 6224.00 21010 ▆▇▁▁▁
race.white 0 1.00 2061.66 1960.52 0 286.0 1618 3378.00 12764 ▇▅▁▁▁
race.black 0 1.00 2469.18 2750.80 0 658.0 1433 3215.00 19263 ▇▂▁▁▁
trans.total 0 1.00 2579.90 1461.48 0 1515.0 2426 3358.00 9813 ▆▇▂▁▁
trans.car 0 1.00 2062.77 1295.37 0 1105.0 1945 2726.00 8545 ▇▇▂▁▁
trans.drovealone 0 1.00 1846.89 1192.38 0 1005.0 1660 2454.00 8070 ▇▇▁▁▁
trans.carpooled 0 1.00 215.88 199.04 0 83.0 155 289.00 1304 ▇▂▁▁▁
trans.pubtrans 0 1.00 192.57 177.80 0 69.0 148 262.00 1158 ▇▃▁▁▁
trans.bicycle 0 1.00 12.11 29.40 0 0.0 0 11.00 225 ▇▁▁▁▁
trans.walk 0 1.00 58.21 114.16 0 0.0 23 61.00 889 ▇▁▁▁▁
trans.WfH 0 1.00 209.55 180.30 0 79.0 169 280.00 1218 ▇▃▁▁▁
Med_HHExp 0 1.00 1985.71 1061.13 0 1246.0 1837 2572.00 6723 ▅▇▃▁▁
med_RETaxes 19 0.95 2940.77 2190.89 199 1253.5 2326 4192.25 10001 ▇▅▃▁▁
count 0 1.00 0.02 0.13 0 0.0 0 0.00 1 ▇▁▁▁▁
tm_shape(FD_tract_Geom_bikeRental) + tm_polygons(col="count") +tm_shape(yelp_in) +tm_dots()
print(skim(FD_tract_Geom_bikeRental))
## Warning: Couldn't find skimmers for class: sfc_MULTIPOLYGON, sfc; No user-
## defined `sfl` provided. Falling back to `character`.
## ── Data Summary ────────────────────────
##                            Values                  
## Name                       FD_tract_Geom_bikeRental
## Number of rows             349                     
## Number of columns          18                      
## _______________________                            
## Column type frequency:                             
##   character                3                       
##   numeric                  15                      
## ________________________                           
## Group variables            None                    
## 
## ── Variable type: character ────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate min  max empty n_unique whitespace
## 1 GEOID                 0             1  11   11     0      349          0
## 2 county                0             1  38   43     0      349          0
## 3 geometry              0             1 174 3684     0      349          0
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable    n_missing complete_rate       mean        sd    p0    p25
##  1 hhincome                 4         0.989 73210.     41839.    12485 42517 
##  2 race.tot                 0         1      5116.      2794.        0  3242 
##  3 race.white               0         1      2062.      1961.        0   286 
##  4 race.black               0         1      2469.      2751.        0   658 
##  5 trans.total              0         1      2580.      1461.        0  1515 
##  6 trans.car                0         1      2063.      1295.        0  1105 
##  7 trans.drovealone         0         1      1847.      1192.        0  1005 
##  8 trans.carpooled          0         1       216.       199.        0    83 
##  9 trans.pubtrans           0         1       193.       178.        0    69 
## 10 trans.bicycle            0         1        12.1       29.4       0     0 
## 11 trans.walk               0         1        58.2      114.        0     0 
## 12 trans.WfH                0         1       210.       180.        0    79 
## 13 Med_HHExp                0         1      1986.      1061.        0  1246 
## 14 med_RETaxes             19         0.946  2941.      2191.      199  1254.
## 15 count                    0         1         0.0172     0.130     0     0 
##      p50    p75   p100 hist 
##  1 63269 92404  236149 ▇▆▂▁▁
##  2  4599  6224   21010 ▆▇▁▁▁
##  3  1618  3378   12764 ▇▅▁▁▁
##  4  1433  3215   19263 ▇▂▁▁▁
##  5  2426  3358    9813 ▆▇▂▁▁
##  6  1945  2726    8545 ▇▇▂▁▁
##  7  1660  2454    8070 ▇▇▁▁▁
##  8   155   289    1304 ▇▂▁▁▁
##  9   148   262    1158 ▇▃▁▁▁
## 10     0    11     225 ▇▁▁▁▁
## 11    23    61     889 ▇▁▁▁▁
## 12   169   280    1218 ▇▃▁▁▁
## 13  1837  2572    6723 ▅▇▃▁▁
## 14  2326  4192.  10001 ▇▅▃▁▁
## 15     0     0       1 ▇▁▁▁▁
## $character
## 
## ── Variable type: character ────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate min  max empty n_unique whitespace
## 1 GEOID                 0             1  11   11     0      349          0
## 2 county                0             1  38   43     0      349          0
## 3 geometry              0             1 174 3684     0      349          0
## 
## $numeric
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_…¹ n_mis…² compl…³    mean      sd    p0    p25   p50    p75   p100 hist
##  1 hhinco…       4   0.989 7.32e+4 4.18e+4 12485 42517  63269 92404  236149 ▇▆▂…
##  2 race.t…       0   1     5.12e+3 2.79e+3     0  3242   4599  6224   21010 ▆▇▁…
##  3 race.w…       0   1     2.06e+3 1.96e+3     0   286   1618  3378   12764 ▇▅▁…
##  4 race.b…       0   1     2.47e+3 2.75e+3     0   658   1433  3215   19263 ▇▂▁…
##  5 trans.…       0   1     2.58e+3 1.46e+3     0  1515   2426  3358    9813 ▆▇▂…
##  6 trans.…       0   1     2.06e+3 1.30e+3     0  1105   1945  2726    8545 ▇▇▂…
##  7 trans.…       0   1     1.85e+3 1.19e+3     0  1005   1660  2454    8070 ▇▇▁…
##  8 trans.…       0   1     2.16e+2 1.99e+2     0    83    155   289    1304 ▇▂▁…
##  9 trans.…       0   1     1.93e+2 1.78e+2     0    69    148   262    1158 ▇▃▁…
## 10 trans.…       0   1     1.21e+1 2.94e+1     0     0      0    11     225 ▇▁▁…
## 11 trans.…       0   1     5.82e+1 1.14e+2     0     0     23    61     889 ▇▁▁…
## 12 trans.…       0   1     2.10e+2 1.80e+2     0    79    169   280    1218 ▇▃▁…
## 13 Med_HH…       0   1     1.99e+3 1.06e+3     0  1246   1837  2572    6723 ▅▇▃…
## 14 med_RE…      19   0.946 2.94e+3 2.19e+3   199  1254.  2326  4192.  10001 ▇▅▃…
## 15 count         0   1     1.72e-2 1.30e-1     0     0      0     0       1 ▇▁▁…
## # … with abbreviated variable names ¹​skim_variable, ²​n_missing, ³​complete_rate
# Dropping the missing values
bikeRental_census_dropnaHH2 <- FD_tract_Geom_bikeRental[!is.na(FD_tract_Geom_bikeRental$hhincome),]

# Just to check whether the NAs have been dropped from hhincome
print(skim(bikeRental_census_dropnaHH2))
## Warning: Couldn't find skimmers for class: sfc_MULTIPOLYGON, sfc; No user-
## defined `sfl` provided. Falling back to `character`.
## ── Data Summary ────────────────────────
##                            Values                      
## Name                       bikeRental_census_dropnaH...
## Number of rows             345                         
## Number of columns          18                          
## _______________________                                
## Column type frequency:                                 
##   character                3                           
##   numeric                  15                          
## ________________________                               
## Group variables            None                        
## 
## ── Variable type: character ────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate min  max empty n_unique whitespace
## 1 GEOID                 0             1  11   11     0      345          0
## 2 county                0             1  38   43     0      345          0
## 3 geometry              0             1 174 3684     0      345          0
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_variable    n_missing complete_rate       mean        sd    p0    p25
##  1 hhincome                 0         1     73210.     41839.    12485 42517 
##  2 race.tot                 0         1      5162.      2774.      196  3344 
##  3 race.white               0         1      2083.      1961.        0   300 
##  4 race.black               0         1      2488.      2759.       19   671 
##  5 trans.total              0         1      2610.      1443.       54  1560 
##  6 trans.car                0         1      2087.      1284.       54  1128 
##  7 trans.drovealone         0         1      1868.      1182.       54  1017 
##  8 trans.carpooled          0         1       218.       199.        0    86 
##  9 trans.pubtrans           0         1       195.       178.        0    71 
## 10 trans.bicycle            0         1        12.2       29.5       0     0 
## 11 trans.walk               0         1        58.9      115.        0     1 
## 12 trans.WfH                0         1       212.       180.        0    82 
## 13 Med_HHExp                0         1      2009.      1045.      164  1261 
## 14 med_RETaxes             15         0.957  2941.      2191.      199  1254.
## 15 count                    0         1         0.0174     0.131     0     0 
##      p50    p75   p100 hist 
##  1 63269 92404  236149 ▇▆▂▁▁
##  2  4693  6236   21010 ▇▇▁▁▁
##  3  1652  3383   12764 ▇▅▁▁▁
##  4  1456  3215   19263 ▇▂▁▁▁
##  5  2436  3366    9813 ▆▇▂▁▁
##  6  1958  2731    8545 ▇▇▂▁▁
##  7  1669  2467    8070 ▇▇▁▁▁
##  8   160   289    1304 ▇▂▁▁▁
##  9   149   263    1158 ▇▃▁▁▁
## 10     0    11     225 ▇▁▁▁▁
## 11    23    61     889 ▇▁▁▁▁
## 12   171   282    1218 ▇▃▁▁▁
## 13  1847  2577    6723 ▆▇▃▁▁
## 14  2326  4192.  10001 ▇▅▃▁▁
## 15     0     0       1 ▇▁▁▁▁
## $character
## 
## ── Variable type: character ────────────────────────────────────────────────────
##   skim_variable n_missing complete_rate min  max empty n_unique whitespace
## 1 GEOID                 0             1  11   11     0      345          0
## 2 county                0             1  38   43     0      345          0
## 3 geometry              0             1 174 3684     0      345          0
## 
## $numeric
## 
## ── Variable type: numeric ──────────────────────────────────────────────────────
##    skim_…¹ n_mis…² compl…³    mean      sd    p0    p25   p50    p75   p100 hist
##  1 hhinco…       0   1     7.32e+4 4.18e+4 12485 42517  63269 92404  236149 ▇▆▂…
##  2 race.t…       0   1     5.16e+3 2.77e+3   196  3344   4693  6236   21010 ▇▇▁…
##  3 race.w…       0   1     2.08e+3 1.96e+3     0   300   1652  3383   12764 ▇▅▁…
##  4 race.b…       0   1     2.49e+3 2.76e+3    19   671   1456  3215   19263 ▇▂▁…
##  5 trans.…       0   1     2.61e+3 1.44e+3    54  1560   2436  3366    9813 ▆▇▂…
##  6 trans.…       0   1     2.09e+3 1.28e+3    54  1128   1958  2731    8545 ▇▇▂…
##  7 trans.…       0   1     1.87e+3 1.18e+3    54  1017   1669  2467    8070 ▇▇▁…
##  8 trans.…       0   1     2.18e+2 1.99e+2     0    86    160   289    1304 ▇▂▁…
##  9 trans.…       0   1     1.95e+2 1.78e+2     0    71    149   263    1158 ▇▃▁…
## 10 trans.…       0   1     1.22e+1 2.95e+1     0     0      0    11     225 ▇▁▁…
## 11 trans.…       0   1     5.89e+1 1.15e+2     0     1     23    61     889 ▇▁▁…
## 12 trans.…       0   1     2.12e+2 1.80e+2     0    82    171   282    1218 ▇▃▁…
## 13 Med_HH…       0   1     2.01e+3 1.05e+3   164  1261   1847  2577    6723 ▆▇▃…
## 14 med_RE…      15   0.957 2.94e+3 2.19e+3   199  1254.  2326  4192.  10001 ▇▅▃…
## 15 count         0   1     1.74e-2 1.31e-1     0     0      0     0       1 ▇▁▁…
## # … with abbreviated variable names ¹​skim_variable, ²​n_missing, ³​complete_rate
y_census_dropnaHHTX <- bikeRental_census_dropnaHH2[!is.na(bikeRental_census_dropnaHH2$med_RETaxes),]
skim(y_census_dropnaHHTX)
## Warning: Couldn't find skimmers for class: sfc_MULTIPOLYGON, sfc; No user-
## defined `sfl` provided. Falling back to `character`.
Data summary
Name y_census_dropnaHHTX
Number of rows 330
Number of columns 18
_______________________
Column type frequency:
character 3
numeric 15
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
GEOID 0 1 11 11 0 330 0
county 0 1 38 43 0 330 0
geometry 0 1 174 3684 0 330 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
hhincome 0 1 75223.13 41645.99 12485 44023.25 66595.0 94728.00 236149 ▇▇▂▂▁
race.tot 0 1 5276.50 2757.89 1254 3391.75 4886.0 6370.75 21010 ▇▆▁▁▁
race.white 0 1 2154.37 1968.38 0 312.25 1739.5 3524.25 12764 ▇▅▁▁▁
race.black 0 1 2521.78 2806.73 19 661.25 1444.5 3361.25 19263 ▇▂▁▁▁
trans.total 0 1 2681.46 1429.68 421 1647.25 2476.5 3405.75 9813 ▇▇▂▁▁
trans.car 0 1 2152.82 1270.68 246 1231.75 2017.0 2784.25 8545 ▇▇▁▁▁
trans.drovealone 0 1 1930.55 1169.36 108 1067.25 1753.5 2534.50 8070 ▇▇▁▁▁
trans.carpooled 0 1 222.26 199.16 0 88.50 161.5 294.75 1304 ▇▂▁▁▁
trans.pubtrans 0 1 195.22 177.66 0 71.00 150.0 262.75 1158 ▇▃▁▁▁
trans.bicycle 0 1 12.34 29.84 0 0.00 0.0 12.00 225 ▇▁▁▁▁
trans.walk 0 1 56.37 106.29 0 1.00 23.5 61.00 889 ▇▁▁▁▁
trans.WfH 0 1 218.59 180.20 0 89.25 177.0 286.50 1218 ▇▃▁▁▁
Med_HHExp 0 1 2064.01 1030.49 347 1319.50 1922.5 2608.75 6723 ▇▇▂▁▁
med_RETaxes 0 1 2940.77 2190.89 199 1253.50 2326.0 4192.25 10001 ▇▅▃▁▁
count 0 1 0.02 0.13 0 0.00 0.0 0.00 1 ▇▁▁▁▁
class(y_census_dropnaHHTX)
## [1] "sf"         "data.frame"
ggplot(y_census_dropnaHHTX, aes(x=hhincome, y=count)) +
  geom_point() +
  ylab("Number of Bike Rental Businesses and Household Median Income in Tract")

y_census_dropnaHHTX$Bikerentals <- ifelse(y_census_dropnaHHTX$count>0, 1, 0)
boxplot(hhincome~Bikerentals, data=y_census_dropnaHHTX, main="Boxplot of Bike Rental Businesses by Income", xlab="Whether Bike Rental Businesses are present", ylab="Household median income")

boxplot(med_RETaxes~Bikerentals, data=y_census_dropnaHHTX, main="Boxplot of Bike Rental Businesses by Real Estate Taxes", xlab="Whether Bike Rental Businesses are present", ylab="Real Estate Taxes in tract")

#ggplot(y_census_dropnaHHTX, aes(x=count, y=trans.bicycle)) +
  #geom_point() +
  #ylab("Number of Bike Commuters")
boxplot(trans.bicycle~Bikerentals, data=y_census_dropnaHHTX, main="Boxplot of Bike Rental Businesses by Bike Commuters", xlab="Bike Rental Businesses are present", ylab="Bike Commuters")

y_census_dropnaHHTX$pop_density <- 1000*y_census_dropnaHHTX$race.tot/st_area(y_census_dropnaHHTX$geometry)
y_census_dropnaHHTX$bike_commuter_proportion <- 100*y_census_dropnaHHTX$trans.bicycle/y_census_dropnaHHTX$trans.total
boxplot(pop_density~Bikerentals, data=y_census_dropnaHHTX, main="Boxplot of Bike Rentals by population density of a tract", xlab="Presence Bike Rental Businesses", ylab="Population Density")

boxplot(bike_commuter_proportion~Bikerentals, data=y_census_dropnaHHTX, main="Boxplot of Bike Rentals by percentage of Bike Commuters", xlab="Presence of Bike Rental Businesses", ylab="percentage of people using bicycle to commute")

tmap_mode("view")
## tmap mode set to interactive viewing
t_pop_den <- tm_shape(y_census_dropnaHHTX) + tm_polygons(col ="pop_density") +tm_shape(yelp_in) +tm_dots()
t_bike_commute <- tm_shape(y_census_dropnaHHTX) + tm_polygons(col="bike_commuter_proportion") +tm_shape(yelp_in) +tm_dots()
tmap_arrange(t_pop_den, t_bike_commute)