#KP Wells# #Urban Analytics# #Mini Assignment 3# #10/02/22#
This time, we’re going to use some basic analyses to select the optimum location for a bike rental store in Fulton and DeKalb counties using the Yelp! API and Census data. I will base my selection on 1.) demographic data that indicates a market for bike rentals and 2.) areas where there is a gap in that market (i.e. no bike rental places around). I’ll also be doing some simple exploratory analyses to verify what the data is telling me.
#As always, let's start off by calling some libraries we will probably need.
library(tidycensus)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.4.3, PROJ 7.2.1; sf_use_s2() is TRUE
library(tmap)
library(jsonlite)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag() masks stats::lag()
library(httr)
library(jsonlite)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(here)
## here() starts at C:/Users/kwells65/OneDrive - Georgia Institute of Technology/Assignments
library(yelpr)
library(knitr)
library(ggplot2)
#Next, I want to define some settings and call my census API
suppressMessages(census_api_key(Sys.getenv("census_api")))
#Step One: Preparing Census Data
Let me start by downloading and preparing the Census data for Fulton and DeKalb.
ful_dek_tracts19 <- suppressMessages(
get_acs(geography = "tract",
state = "GA",
county = c("Fulton", "Dekalb"),
variables = c(hhincome = 'B19019_001',
population = "B01003_001",
trans.bicycle = "B08006_014",
trans.walk = "B08006_015"),
year = 2019,
survey = "acs5",
geometry = TRUE,
output = "wide"))
##
|
| | 0%
|
|= | 1%
|
|= | 2%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|========= | 12%
|
|========= | 13%
|
|========= | 14%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 18%
|
|============== | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 23%
|
|================= | 25%
|
|================== | 26%
|
|=================== | 27%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|============================ | 40%
|
|============================= | 41%
|
|============================== | 42%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 46%
|
|================================= | 48%
|
|================================== | 49%
|
|=================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|=============================================== | 67%
|
|================================================ | 68%
|
|================================================= | 69%
|
|================================================== | 71%
|
|=================================================== | 72%
|
|=================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================= | 92%
|
|================================================================== | 94%
|
|=================================================================== | 95%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
#I want to look at cyclists, but also identified pedestrians as a likely group to occasionally rent a bicycle, so I included them as well.
#check the output before moving on
message(sprintf("nrow: %s, ncol: %s", nrow(ful_dek_tracts19), ncol(ful_dek_tracts19)))
## nrow: 349, ncol: 11
ful_dek_tracts19 %>% head() %>% knitr::kable()
| GEOID | NAME | hhincomeE | hhincomeM | populationE | populationM | trans.bicycleE | trans.bicycleM | trans.walkE | trans.walkM | geometry |
|---|---|---|---|---|---|---|---|---|---|---|
| 13121001100 | Census Tract 11, Fulton County, Georgia | 109426 | 7959 | 5193 | 453 | 71 | 51 | 632 | 192 | MULTIPOLYGON (((-84.38782 3… |
| 13121009603 | Census Tract 96.03, Fulton County, Georgia | 83243 | 15103 | 4830 | 332 | 0 | 13 | 22 | 37 | MULTIPOLYGON (((-84.38738 3… |
| 13121005800 | Census Tract 58, Fulton County, Georgia | 42679 | 16189 | 1479 | 250 | 12 | 13 | 43 | 41 | MULTIPOLYGON (((-84.41692 3… |
| 13121010117 | Census Tract 101.17, Fulton County, Georgia | 60731 | 5573 | 4319 | 488 | 24 | 33 | 15 | 19 | MULTIPOLYGON (((-84.36575 3… |
| 13121009502 | Census Tract 95.02, Fulton County, Georgia | 86053 | 24545 | 3867 | 497 | 0 | 13 | 62 | 50 | MULTIPOLYGON (((-84.39472 3… |
| 13089021213 | Census Tract 212.13, DeKalb County, Georgia | 154063 | 19674 | 3526 | 204 | 0 | 13 | 0 | 13 | MULTIPOLYGON (((-84.34783 3… |
#Now I'm going to select what I want and create some variables.
FD_tracts19 <- ful_dek_tracts19 %>%
select(GEOID,
hhincome19 = hhincomeE, # New name = old name
population19 = populationE,
trans.bicycle19 = trans.bicycleE,
trans.walk19 = trans.walkE) %>%
mutate(ptrans.bicycle19 =trans.bicycle19/population19,
ptrans.walk19 = trans.walk19/population19)
#I also want to see where there is an increase in the number of people cycling to work as this might indicate an increasing demand in bike rentals, so let's get more than one year to calculate change.
ful_dek_tracts14 <- suppressMessages(
get_acs(geography = "tract",
state = "GA",
county = c("Fulton", "Dekalb"),
variables = c(hhincome14 = 'B19019_001',
population14 = "B01003_001",
trans.bicycle14 = "B08006_014",
trans.walk14 = "B08006_015"),
year = 2014,
survey = "acs5",
geometry = TRUE,
output = "wide"))
##
|
| | 0%
|
|= | 1%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|========= | 13%
|
|========== | 14%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 24%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|======================= | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 55%
|
|======================================= | 56%
|
|======================================== | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================== | 65%
|
|=============================================== | 66%
|
|=============================================== | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================== | 71%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|====================================================== | 76%
|
|====================================================== | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|============================================================ | 85%
|
|============================================================= | 86%
|
|============================================================= | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|=================================================================== | 95%
|
|==================================================================== | 96%
|
|==================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
#check the output before moving on
message(sprintf("nrow: %s, ncol: %s", nrow(ful_dek_tracts14), ncol(ful_dek_tracts14)))
## nrow: 349, ncol: 11
ful_dek_tracts14 %>% head() %>% knitr::kable()
| GEOID | NAME | hhincome14E | hhincome14M | population14E | population14M | trans.bicycle14E | trans.bicycle14M | trans.walk14E | trans.walk14M | geometry |
|---|---|---|---|---|---|---|---|---|---|---|
| 13089020500 | Census Tract 205, DeKalb County, Georgia | 45760 | 8734 | 2922 | 341 | 30 | 26 | 19 | 31 | MULTIPOLYGON (((-84.34919 3… |
| 13089021208 | Census Tract 212.08, DeKalb County, Georgia | 58935 | 10987 | 4155 | 379 | 0 | 13 | 21 | 25 | MULTIPOLYGON (((-84.32248 3… |
| 13089021415 | Census Tract 214.15, DeKalb County, Georgia | 80913 | 15234 | 4020 | 435 | 0 | 13 | 75 | 62 | MULTIPOLYGON (((-84.34836 3… |
| 13089021814 | Census Tract 218.14, DeKalb County, Georgia | 33520 | 4466 | 5945 | 695 | 0 | 18 | 57 | 63 | MULTIPOLYGON (((-84.24736 3… |
| 13089022700 | Census Tract 227, DeKalb County, Georgia | 92000 | 13716 | 4517 | 300 | 20 | 11 | 82 | 40 | MULTIPOLYGON (((-84.31249 3… |
| 13089023111 | Census Tract 231.11, DeKalb County, Georgia | 50223 | 11003 | 1935 | 235 | 0 | 13 | 0 | 13 | MULTIPOLYGON (((-84.23339 3… |
FD_tracts14 <- ful_dek_tracts14 %>%
select(GEOID,
hhincome14 = hhincome14E, # New name = old name
population14 = population14E,
trans.bicycle14 = trans.bicycle14E,
trans.walk14 = trans.walk14E) %>%
mutate(ptrans.bicycle14 =trans.bicycle14/population14,
ptrans.walk14 = trans.walk14/population14)
#Now we combine our two dfs and calculate change
FD_tracts19_chg <- st_join(FD_tracts14, FD_tracts19)%>%
mutate(trans.bicyclechg = trans.bicycle19-trans.bicycle14,
trans.walkchg = trans.walk19-trans.walk14)
#Looks like the join created duplicate records. Let's clean that up.
FD_tracts19_2 <- FD_tracts19_chg %>%
distinct(geometry, .keep_all=T)
#Much better!
#Let's check it
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(FD_tracts19_2) + tm_borders()
#Step Two: Drawing the Bounding Box
In this block of code, we’re going to get the bounding box of our polygon, get lat/long coordinates of any one of the corners of that bounding box and get the centroid of the bounding box (bb).
get_r <- function(poly, epsg_id){
bb <- st_bbox(poly)
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
#Next, I'm going to get the distance between bb_p and c and multiply it by 1.1 to make the circle a bit bigger than the tracts
r <- st_distance(bb_corner, bb_center)
bb_center$radius <- r*1.2
return(bb_center)
}
#now, let's apply the function to each of our polygons using a for loop
#first, we create an empty vector for our results to fill
epsg_id <- 4326 #NOTE: 4326 measures distance in meters. You can also use 26967.
r4all_loop <- vector("list", nrow(FD_tracts19_2))
#for loop starts here
for (i in 1:nrow(FD_tracts19_2)){
r4all_loop[[i]] <- FD_tracts19_2 %>%
st_transform(crs = epsg_id) %>%
st_geometry() %>%
.[[i]] %>%
get_r(epsg_id = epsg_id)
}
r4all_loop <- bind_rows(r4all_loop)
#Now, I just need to append my x/y coordinates
ready_4_yelp <- r4all_loop %>%
mutate(x = st_coordinates(.)[,1],
y = st_coordinates(.)[,2])
#Step Three: Defining the Function
The purpose of this block of code is to get one row of tract information (1,) and category name (str), The output is a list of businesses.
get_yelp <- function(tract, category){
n <- 1
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50, # = 0 when n = 1
radius = round(tract$radius),
limit = 50)
required_n <- ceiling(resp$total/50)
out <- vector("list", required_n)
#'out' is where our results will be appended to.
# Store the business information to nth slot in out
out[[n]] <- resp$businesses
# Next, I need to change the name of the elements to the total required_n
# this is to know if there are more than 1000 businesses,we know how many.
names(out)[n] <- required_n
#throw error if more than 1000
if (resp$total >= 1000)
{
#glue formats strings of text by inserting {n} with what's currently stored in object n.
print(glue::glue("{n}th row has >= 1000 businesses."))
#now, we need to stop before going into the loop because we need to break down Census Tract to something smaller.
return(out)
}
else
{
# add 1 to n
n <- n + 1
#Now all that's left is to write my 'while' loop
while(n <= required_n){
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
} #<< this signifies the end of the while loop
#Finally, we merge all elements in the list into a single data frame
ful_dek_out <- out %>% bind_rows()
return(ful_dek_out)
}
}
#Step Four: Applying the Function
First, I’ll prepare my collector before looping through my census tracts.
yelp_bike_list <- vector("list", nrow(ready_4_yelp))
#loops through all the census tracts for bike rentals
for (row in 1:nrow(ready_4_yelp)){
yelp_bike_list[[row]] <- suppressWarnings(suppressMessages(get_yelp(ready_4_yelp[row,], "bikerentals")))
if (row %% 1 == 0){
Sys.sleep(1)
print(paste0("Current row: ", row))
}
}
## [1] "Current row: 1"
## [1] "Current row: 2"
## [1] "Current row: 3"
## [1] "Current row: 4"
## [1] "Current row: 5"
## [1] "Current row: 6"
## [1] "Current row: 7"
## [1] "Current row: 8"
## [1] "Current row: 9"
## [1] "Current row: 10"
## [1] "Current row: 11"
## [1] "Current row: 12"
## [1] "Current row: 13"
## [1] "Current row: 14"
## [1] "Current row: 15"
## [1] "Current row: 16"
## [1] "Current row: 17"
## [1] "Current row: 18"
## [1] "Current row: 19"
## [1] "Current row: 20"
## [1] "Current row: 21"
## [1] "Current row: 22"
## [1] "Current row: 23"
## [1] "Current row: 24"
## [1] "Current row: 25"
## [1] "Current row: 26"
## [1] "Current row: 27"
## [1] "Current row: 28"
## [1] "Current row: 29"
## [1] "Current row: 30"
## [1] "Current row: 31"
## [1] "Current row: 32"
## [1] "Current row: 33"
## [1] "Current row: 34"
## [1] "Current row: 35"
## [1] "Current row: 36"
## [1] "Current row: 37"
## [1] "Current row: 38"
## [1] "Current row: 39"
## [1] "Current row: 40"
## [1] "Current row: 41"
## [1] "Current row: 42"
## [1] "Current row: 43"
## [1] "Current row: 44"
## [1] "Current row: 45"
## [1] "Current row: 46"
## [1] "Current row: 47"
## [1] "Current row: 48"
## [1] "Current row: 49"
## [1] "Current row: 50"
## [1] "Current row: 51"
## [1] "Current row: 52"
## [1] "Current row: 53"
## [1] "Current row: 54"
## [1] "Current row: 55"
## [1] "Current row: 56"
## [1] "Current row: 57"
## [1] "Current row: 58"
## [1] "Current row: 59"
## [1] "Current row: 60"
## [1] "Current row: 61"
## [1] "Current row: 62"
## [1] "Current row: 63"
## [1] "Current row: 64"
## [1] "Current row: 65"
## [1] "Current row: 66"
## [1] "Current row: 67"
## [1] "Current row: 68"
## [1] "Current row: 69"
## [1] "Current row: 70"
## [1] "Current row: 71"
## [1] "Current row: 72"
## [1] "Current row: 73"
## [1] "Current row: 74"
## [1] "Current row: 75"
## [1] "Current row: 76"
## [1] "Current row: 77"
## [1] "Current row: 78"
## [1] "Current row: 79"
## [1] "Current row: 80"
## [1] "Current row: 81"
## [1] "Current row: 82"
## [1] "Current row: 83"
## [1] "Current row: 84"
## [1] "Current row: 85"
## [1] "Current row: 86"
## [1] "Current row: 87"
## [1] "Current row: 88"
## [1] "Current row: 89"
## [1] "Current row: 90"
## [1] "Current row: 91"
## [1] "Current row: 92"
## [1] "Current row: 93"
## [1] "Current row: 94"
## [1] "Current row: 95"
## [1] "Current row: 96"
## [1] "Current row: 97"
## [1] "Current row: 98"
## [1] "Current row: 99"
## [1] "Current row: 100"
## [1] "Current row: 101"
## [1] "Current row: 102"
## [1] "Current row: 103"
## [1] "Current row: 104"
## [1] "Current row: 105"
## [1] "Current row: 106"
## [1] "Current row: 107"
## [1] "Current row: 108"
## [1] "Current row: 109"
## [1] "Current row: 110"
## [1] "Current row: 111"
## [1] "Current row: 112"
## [1] "Current row: 113"
## [1] "Current row: 114"
## [1] "Current row: 115"
## [1] "Current row: 116"
## [1] "Current row: 117"
## [1] "Current row: 118"
## [1] "Current row: 119"
## [1] "Current row: 120"
## [1] "Current row: 121"
## [1] "Current row: 122"
## [1] "Current row: 123"
## [1] "Current row: 124"
## [1] "Current row: 125"
## [1] "Current row: 126"
## [1] "Current row: 127"
## [1] "Current row: 128"
## [1] "Current row: 129"
## [1] "Current row: 130"
## [1] "Current row: 131"
## [1] "Current row: 132"
## [1] "Current row: 133"
## [1] "Current row: 134"
## [1] "Current row: 135"
## [1] "Current row: 136"
## [1] "Current row: 137"
## [1] "Current row: 138"
## [1] "Current row: 139"
## [1] "Current row: 140"
## [1] "Current row: 141"
## [1] "Current row: 142"
## [1] "Current row: 143"
## [1] "Current row: 144"
## [1] "Current row: 145"
## [1] "Current row: 146"
## [1] "Current row: 147"
## [1] "Current row: 148"
## [1] "Current row: 149"
## [1] "Current row: 150"
## [1] "Current row: 151"
## [1] "Current row: 152"
## [1] "Current row: 153"
## [1] "Current row: 154"
## [1] "Current row: 155"
## [1] "Current row: 156"
## [1] "Current row: 157"
## [1] "Current row: 158"
## [1] "Current row: 159"
## [1] "Current row: 160"
## [1] "Current row: 161"
## [1] "Current row: 162"
## [1] "Current row: 163"
## [1] "Current row: 164"
## [1] "Current row: 165"
## [1] "Current row: 166"
## [1] "Current row: 167"
## [1] "Current row: 168"
## [1] "Current row: 169"
## [1] "Current row: 170"
## [1] "Current row: 171"
## [1] "Current row: 172"
## [1] "Current row: 173"
## [1] "Current row: 174"
## [1] "Current row: 175"
## [1] "Current row: 176"
## [1] "Current row: 177"
## [1] "Current row: 178"
## [1] "Current row: 179"
## [1] "Current row: 180"
## [1] "Current row: 181"
## [1] "Current row: 182"
## [1] "Current row: 183"
## [1] "Current row: 184"
## [1] "Current row: 185"
## [1] "Current row: 186"
## [1] "Current row: 187"
## [1] "Current row: 188"
## [1] "Current row: 189"
## [1] "Current row: 190"
## [1] "Current row: 191"
## [1] "Current row: 192"
## [1] "Current row: 193"
## [1] "Current row: 194"
## [1] "Current row: 195"
## [1] "Current row: 196"
## [1] "Current row: 197"
## [1] "Current row: 198"
## [1] "Current row: 199"
## [1] "Current row: 200"
## [1] "Current row: 201"
## [1] "Current row: 202"
## [1] "Current row: 203"
## [1] "Current row: 204"
## [1] "Current row: 205"
## [1] "Current row: 206"
## [1] "Current row: 207"
## [1] "Current row: 208"
## [1] "Current row: 209"
## [1] "Current row: 210"
## [1] "Current row: 211"
## [1] "Current row: 212"
## [1] "Current row: 213"
## [1] "Current row: 214"
## [1] "Current row: 215"
## [1] "Current row: 216"
## [1] "Current row: 217"
## [1] "Current row: 218"
## [1] "Current row: 219"
## [1] "Current row: 220"
## [1] "Current row: 221"
## [1] "Current row: 222"
## [1] "Current row: 223"
## [1] "Current row: 224"
## [1] "Current row: 225"
## [1] "Current row: 226"
## [1] "Current row: 227"
## [1] "Current row: 228"
## [1] "Current row: 229"
## [1] "Current row: 230"
## [1] "Current row: 231"
## [1] "Current row: 232"
## [1] "Current row: 233"
## [1] "Current row: 234"
## [1] "Current row: 235"
## [1] "Current row: 236"
## [1] "Current row: 237"
## [1] "Current row: 238"
## [1] "Current row: 239"
## [1] "Current row: 240"
## [1] "Current row: 241"
## [1] "Current row: 242"
## [1] "Current row: 243"
## [1] "Current row: 244"
## [1] "Current row: 245"
## [1] "Current row: 246"
## [1] "Current row: 247"
## [1] "Current row: 248"
## [1] "Current row: 249"
## [1] "Current row: 250"
## [1] "Current row: 251"
## [1] "Current row: 252"
## [1] "Current row: 253"
## [1] "Current row: 254"
## [1] "Current row: 255"
## [1] "Current row: 256"
## [1] "Current row: 257"
## [1] "Current row: 258"
## [1] "Current row: 259"
## [1] "Current row: 260"
## [1] "Current row: 261"
## [1] "Current row: 262"
## [1] "Current row: 263"
## [1] "Current row: 264"
## [1] "Current row: 265"
## [1] "Current row: 266"
## [1] "Current row: 267"
## [1] "Current row: 268"
## [1] "Current row: 269"
## [1] "Current row: 270"
## [1] "Current row: 271"
## [1] "Current row: 272"
## [1] "Current row: 273"
## [1] "Current row: 274"
## [1] "Current row: 275"
## [1] "Current row: 276"
## [1] "Current row: 277"
## [1] "Current row: 278"
## [1] "Current row: 279"
## [1] "Current row: 280"
## [1] "Current row: 281"
## [1] "Current row: 282"
## [1] "Current row: 283"
## [1] "Current row: 284"
## [1] "Current row: 285"
## [1] "Current row: 286"
## [1] "Current row: 287"
## [1] "Current row: 288"
## [1] "Current row: 289"
## [1] "Current row: 290"
## [1] "Current row: 291"
## [1] "Current row: 292"
## [1] "Current row: 293"
## [1] "Current row: 294"
## [1] "Current row: 295"
## [1] "Current row: 296"
## [1] "Current row: 297"
## [1] "Current row: 298"
## [1] "Current row: 299"
## [1] "Current row: 300"
## [1] "Current row: 301"
## [1] "Current row: 302"
## [1] "Current row: 303"
## [1] "Current row: 304"
## [1] "Current row: 305"
## [1] "Current row: 306"
## [1] "Current row: 307"
## [1] "Current row: 308"
## [1] "Current row: 309"
## [1] "Current row: 310"
## [1] "Current row: 311"
## [1] "Current row: 312"
## [1] "Current row: 313"
## [1] "Current row: 314"
## [1] "Current row: 315"
## [1] "Current row: 316"
## [1] "Current row: 317"
## [1] "Current row: 318"
## [1] "Current row: 319"
## [1] "Current row: 320"
## [1] "Current row: 321"
## [1] "Current row: 322"
## [1] "Current row: 323"
## [1] "Current row: 324"
## [1] "Current row: 325"
## [1] "Current row: 326"
## [1] "Current row: 327"
## [1] "Current row: 328"
## [1] "Current row: 329"
## [1] "Current row: 330"
## [1] "Current row: 331"
## [1] "Current row: 332"
## [1] "Current row: 333"
## [1] "Current row: 334"
## [1] "Current row: 335"
## [1] "Current row: 336"
## [1] "Current row: 337"
## [1] "Current row: 338"
## [1] "Current row: 339"
## [1] "Current row: 340"
## [1] "Current row: 341"
## [1] "Current row: 342"
## [1] "Current row: 343"
## [1] "Current row: 344"
## [1] "Current row: 345"
## [1] "Current row: 346"
## [1] "Current row: 347"
## [1] "Current row: 348"
## [1] "Current row: 349"
#going by 1s instead of 10s was the only way I knew how to fix this.
#now we collapse it into a single df
yelp_bike_list2 <- yelp_bike_list %>% bind_rows() %>% as_tibble()
#let's take a look
yelp_bike_list2 %>% print(width=1000)
## # A tibble: 209 × 16
## id alias
## <chr> <chr>
## 1 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain
## 2 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain
## 3 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain
## 4 rbf8bVY0cuqyGZtbn691lg pedego-electric-bikes-atlanta-atlanta-2
## 5 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain
## 6 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain
## 7 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain
## 8 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain
## 9 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain
## 10 FK7-M9BGyCgpEmVifcPfoA aztec-cycles-stone-mountain
## name
## <chr>
## 1 Aztec Cycles
## 2 Aztec Cycles
## 3 Aztec Cycles
## 4 Pedego Electric Bikes Atlanta
## 5 Aztec Cycles
## 6 Aztec Cycles
## 7 Aztec Cycles
## 8 Aztec Cycles
## 9 Aztec Cycles
## 10 Aztec Cycles
## image_url
## <chr>
## 1 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 2 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 3 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 4 https://s3-media2.fl.yelpcdn.com/bphoto/Z7KlxC0vcyoxOCSxbH3Rrg/o.jpg
## 5 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 6 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 7 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 8 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 9 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## 10 https://s3-media3.fl.yelpcdn.com/bphoto/re-aoEuun-QS1SE7dQCySQ/o.jpg
## is_closed
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## url
## <chr>
## 1 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=D_azJCk…
## 2 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=D_azJCk…
## 3 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=D_azJCk…
## 4 https://www.yelp.com/biz/pedego-electric-bikes-atlanta-atlanta-2?adjust_crea…
## 5 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=D_azJCk…
## 6 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=D_azJCk…
## 7 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=D_azJCk…
## 8 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=D_azJCk…
## 9 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=D_azJCk…
## 10 https://www.yelp.com/biz/aztec-cycles-stone-mountain?adjust_creative=D_azJCk…
## review_count categories rating coordinates$latitude $longitude transactions
## <int> <list> <dbl> <dbl> <dbl> <list>
## 1 54 <df [3 × 2]> 5 33.8 -84.2 <list [0]>
## 2 54 <df [3 × 2]> 5 33.8 -84.2 <list [0]>
## 3 54 <df [3 × 2]> 5 33.8 -84.2 <list [0]>
## 4 9 <df [3 × 2]> 4.5 33.7 -84.4 <list [0]>
## 5 54 <df [3 × 2]> 5 33.8 -84.2 <list [0]>
## 6 54 <df [3 × 2]> 5 33.8 -84.2 <list [0]>
## 7 54 <df [3 × 2]> 5 33.8 -84.2 <list [0]>
## 8 54 <df [3 × 2]> 5 33.8 -84.2 <list [0]>
## 9 54 <df [3 × 2]> 5 33.8 -84.2 <list [0]>
## 10 54 <df [3 × 2]> 5 33.8 -84.2 <list [0]>
## price location$address1 $address2 $address3 $city $zip_code
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 $$ 901 Main St "" "" Stone Mountain 30083
## 2 $$ 901 Main St "" "" Stone Mountain 30083
## 3 $$ 901 Main St "" "" Stone Mountain 30083
## 4 <NA> 414 Bill Kennedy Way "Ste 101" <NA> Atlanta 30316
## 5 $$ 901 Main St "" "" Stone Mountain 30083
## 6 $$ 901 Main St "" "" Stone Mountain 30083
## 7 $$ 901 Main St "" "" Stone Mountain 30083
## 8 $$ 901 Main St "" "" Stone Mountain 30083
## 9 $$ 901 Main St "" "" Stone Mountain 30083
## 10 $$ 901 Main St "" "" Stone Mountain 30083
## $country $state $display_address phone display_phone distance
## <chr> <chr> <list> <chr> <chr> <dbl>
## 1 US GA <chr [2]> +16786369043 (678) 636-9043 16218.
## 2 US GA <chr [2]> +16786369043 (678) 636-9043 13372.
## 3 US GA <chr [2]> +16786369043 (678) 636-9043 7337.
## 4 US GA <chr [3]> +14049753915 (404) 975-3915 732.
## 5 US GA <chr [2]> +16786369043 (678) 636-9043 29681.
## 6 US GA <chr [2]> +16786369043 (678) 636-9043 23029.
## 7 US GA <chr [2]> +16786369043 (678) 636-9043 31208.
## 8 US GA <chr [2]> +16786369043 (678) 636-9043 29587.
## 9 US GA <chr [2]> +16786369043 (678) 636-9043 24604.
## 10 US GA <chr [2]> +16786369043 (678) 636-9043 25503.
## # … with 199 more rows
## # ℹ Use `print(n = ...)` to see more rows
#Step Five: Cleaning the Data
#The first step is to delete duplicated rows.
bike_yelp_unique <- yelp_bike_list2 %>%
distinct(id, .keep_all=T)
glue::glue("Before dropping NA, there were {nrow(yelp_bike_list2)} rows. After dropping them, there are {nrow(bike_yelp_unique)} rows") %>%
print()
## Before dropping NA, there were 209 rows. After dropping them, there are 16 rows
#I could have waited to do the census data here as well, but I didn't want to. All those duplicate objects would have meant that it would take forever for my loop to run when I was applying my function.
#Next, I'll flatten nested columns that have multiple variables in one column.
concate_list <- function(x){
# x is a data frame with columns "alias" and "title" from Yelp$categories
# returns a character vector containing category concatenated titles
titles <- x[["title"]] %>% str_c(collapse = ", ")
return(titles)
}
bike_yelp_flat <- bike_yelp_unique %>%
#this line of code is flattening columns with data frame
jsonlite::flatten() %>%
relocate(coordinates.latitude, coordinates.longitude, .after = rating) %>%
relocate(location.address1, location.address2, location.address3, location.city,
location.zip_code, location.country, location.state, location.display_address, .after = price) %>%
as_tibble() %>%
# Flatten list-columns ("transactions" (empty), "location.display_address", and "categories")
mutate(transactions = transactions %>% map_chr(function(x) str_c(x, collapse = ", ")),
location.display_address = location.display_address %>% map_chr(function(x) str_c(x, collapse = ", ")),
categories = categories %>% map_chr(concate_list))
bike_yelp_flat %>%
map_dbl(., function(x) sum(is.na(x)))
## id alias name
## 0 0 0
## image_url is_closed url
## 0 0 0
## review_count categories rating
## 0 0 0
## coordinates.latitude coordinates.longitude transactions
## 0 0 0
## price location.address1 location.address2
## 9 1 5
## location.address3 location.city location.zip_code
## 7 0 0
## location.country location.state location.display_address
## 0 0 0
## phone display_phone distance
## 0 0 0
#Lucky for us, none of the variables that we're interested in are null. We won't need to drop any NAs in that case.
#The last step is to delete rows that fall outside of my boundary.
#Here's our census boundary
FD_census <- st_read("https://raw.githubusercontent.com/BonwooKoo/UrbanAnalytics2022/main/Lab/module_0/testdata.geojson")
## Reading layer `testdata' from data source
## `https://raw.githubusercontent.com/BonwooKoo/UrbanAnalytics2022/main/Lab/module_0/testdata.geojson'
## using driver `GeoJSON'
## Simple feature collection with 519 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -84.85071 ymin: 33.35246 xmax: -84.02371 ymax: 34.18629
## Geodetic CRS: WGS 84
#The below code is converting bike_yelp_flat into a sf object
yelp_sf <- bike_yelp_flat %>%
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
# sf subsets
yelp_bikes_in <- yelp_sf[FD_tracts19_2 %>% st_transform(crs=4326) %>%
st_union(), ,op = st_intersects]
#Finally, let's compare our datasets
glue::glue("nrow before: {nrow(yelp_bike_list2)} -> nrow after: {nrow(yelp_bikes_in)} \n
ncol before: {ncol(yelp_bike_list2)} -> ncol after: {ncol(yelp_bikes_in)} \n") %>%
print()
## nrow before: 209 -> nrow after: 14
##
## ncol before: 16 -> ncol after: 23
#Step Six: Exploring the Data
Let’s see if we can’t have a deeper understanding of our data by running some basic analyses. First, we’ll skim the data.
if (!require("skimr")) install.packages("skimr")
## Loading required package: skimr
library(skimr)
skim(yelp_bikes_in)
## Warning: Couldn't find skimmers for class: sfc_POINT, sfc; No user-defined `sfl`
## provided. Falling back to `character`.
| Name | yelp_bikes_in |
| Number of rows | 14 |
| Number of columns | 23 |
| _______________________ | |
| Column type frequency: | |
| character | 19 |
| logical | 1 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| id | 0 | 1.00 | 22 | 22 | 0 | 14 | 0 |
| alias | 0 | 1.00 | 14 | 43 | 0 | 14 | 0 |
| name | 0 | 1.00 | 4 | 32 | 0 | 14 | 0 |
| image_url | 0 | 1.00 | 68 | 68 | 0 | 14 | 0 |
| url | 0 | 1.00 | 171 | 200 | 0 | 14 | 0 |
| categories | 0 | 1.00 | 19 | 52 | 0 | 12 | 0 |
| transactions | 0 | 1.00 | 0 | 0 | 14 | 1 | 0 |
| price | 8 | 0.43 | 1 | 4 | 0 | 4 | 0 |
| location.address1 | 1 | 0.93 | 0 | 24 | 3 | 11 | 0 |
| location.address2 | 4 | 0.71 | 0 | 9 | 7 | 4 | 0 |
| location.address3 | 6 | 0.57 | 0 | 0 | 8 | 1 | 0 |
| location.city | 0 | 1.00 | 7 | 14 | 0 | 5 | 0 |
| location.zip_code | 0 | 1.00 | 5 | 5 | 0 | 11 | 0 |
| location.country | 0 | 1.00 | 2 | 2 | 0 | 1 | 0 |
| location.state | 0 | 1.00 | 2 | 2 | 0 | 1 | 0 |
| location.display_address | 0 | 1.00 | 17 | 50 | 0 | 14 | 0 |
| phone | 0 | 1.00 | 12 | 12 | 0 | 14 | 0 |
| display_phone | 0 | 1.00 | 14 | 14 | 0 | 14 | 0 |
| geometry | 0 | 1.00 | 21 | 38 | 0 | 14 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| is_closed | 0 | 1 | 0 | FAL: 14 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| review_count | 0 | 1 | 29.50 | 36.61 | 1.00 | 3.25 | 10.50 | 50.50 | 124.00 | ▇▁▂▁▁ |
| rating | 0 | 1 | 3.64 | 1.61 | 1.00 | 2.50 | 4.50 | 4.50 | 5.00 | ▂▁▁▁▇ |
| distance | 0 | 1 | 3310.91 | 4403.07 | 230.64 | 773.74 | 1847.05 | 3048.27 | 16217.92 | ▇▁▁▁▁ |
#Now let's take a look at it
tm_shape(yelp_bikes_in) + tm_dots(col="yellow") + tm_shape(FD_tracts19_2) + tm_borders()
#let's check coordinate systems
head(yelp_bikes_in)
## Simple feature collection with 6 features and 22 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -84.36515 ymin: 33.74172 xmax: -84.16982 ymax: 34.05956
## Geodetic CRS: WGS 84
## # A tibble: 6 × 23
## id alias name image…¹ is_cl…² url revie…³ categ…⁴ rating trans…⁵ price
## <chr> <chr> <chr> <chr> <lgl> <chr> <int> <chr> <dbl> <chr> <chr>
## 1 FK7-M9… azte… Azte… https:… FALSE http… 54 Bikes,… 5 "" $$
## 2 rbf8bV… pede… Pede… https:… FALSE http… 9 Bikes,… 4.5 "" <NA>
## 3 OJVvH1… rosw… Rosw… https:… FALSE http… 70 Bikes,… 4 "" $$$
## 4 cRTM5f… the-… The … https:… FALSE http… 1 Bike R… 1 "" $$$$
## 5 BozJwf… podi… Podi… https:… FALSE http… 19 Bikes,… 4.5 "" $$$
## 6 b3nacM… atla… Atla… https:… FALSE http… 124 Bike R… 4.5 "" $$
## # … with 12 more variables: location.address1 <chr>, location.address2 <chr>,
## # location.address3 <chr>, location.city <chr>, location.zip_code <chr>,
## # location.country <chr>, location.state <chr>,
## # location.display_address <chr>, phone <chr>, display_phone <chr>,
## # distance <dbl>, geometry <POINT [°]>, and abbreviated variable names
## # ¹image_url, ²is_closed, ³review_count, ⁴categories, ⁵transactions
## # ℹ Use `colnames()` to see all variable names
head(FD_tracts19_2)
## Simple feature collection with 6 features and 16 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -84.3492 ymin: 33.7472 xmax: -84.213 ymax: 33.91808
## Geodetic CRS: NAD83
## GEOID.x hhincome14 population14 trans.bicycle14 trans.walk14
## 1 13089020500 45760 2922 30 19
## 2 13089021208 58935 4155 0 21
## 3 13089021415 80913 4020 0 75
## 4 13089021814 33520 5945 0 57
## 5 13089022700 92000 4517 20 82
## 6 13089023111 50223 1935 0 0
## ptrans.bicycle14 ptrans.walk14 GEOID.y hhincome19 population19
## 1 0.010266940 0.006502396 13089020500 65694 3161
## 2 0.000000000 0.005054152 13089021301 46667 3817
## 3 0.000000000 0.018656716 13121009404 81004 4484
## 4 0.000000000 0.009587889 13089021809 99337 6785
## 5 0.004427718 0.018153642 13089020802 73981 4411
## 6 0.000000000 0.000000000 13089023114 51105 2901
## trans.bicycle19 trans.walk19 ptrans.bicycle19 ptrans.walk19 trans.bicyclechg
## 1 23 41 0.007276178 0.012970579 -7
## 2 0 38 0.000000000 0.009955462 0
## 3 0 15 0.000000000 0.003345227 0
## 4 0 56 0.000000000 0.008253500 0
## 5 32 49 0.007254591 0.011108592 12
## 6 0 16 0.000000000 0.005515340 0
## trans.walkchg geometry
## 1 22 MULTIPOLYGON (((-84.34919 3...
## 2 17 MULTIPOLYGON (((-84.32248 3...
## 3 -60 MULTIPOLYGON (((-84.34836 3...
## 4 -1 MULTIPOLYGON (((-84.24736 3...
## 5 -33 MULTIPOLYGON (((-84.31249 3...
## 6 16 MULTIPOLYGON (((-84.23339 3...
#the crs is different for both datasets, so let's do some transformation.
FD_tract_geom <- st_transform(FD_tracts19_2, crs=4326)
Yelp_in_geom <- st_transform(yelp_bikes_in, crs=4326)
#yelp already has the correct crs but I wanted to do it again since sometimes there is
bike_in_FDtract <- st_join(FD_tract_geom, Yelp_in_geom, join = st_intersects)
skim(bike_in_FDtract)
## Warning: Couldn't find skimmers for class: sfc_MULTIPOLYGON, sfc; No user-
## defined `sfl` provided. Falling back to `character`.
| Name | bike_in_FDtract |
| Number of rows | 350 |
| Number of columns | 39 |
| _______________________ | |
| Column type frequency: | |
| character | 21 |
| logical | 1 |
| numeric | 17 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| GEOID.x | 0 | 1.00 | 11 | 11 | 0 | 349 | 0 |
| GEOID.y | 0 | 1.00 | 11 | 11 | 0 | 106 | 0 |
| id | 336 | 0.04 | 22 | 22 | 0 | 14 | 0 |
| alias | 336 | 0.04 | 14 | 43 | 0 | 14 | 0 |
| name | 336 | 0.04 | 4 | 32 | 0 | 14 | 0 |
| image_url | 336 | 0.04 | 68 | 68 | 0 | 14 | 0 |
| url | 336 | 0.04 | 171 | 200 | 0 | 14 | 0 |
| categories | 336 | 0.04 | 19 | 52 | 0 | 12 | 0 |
| transactions | 336 | 0.04 | 0 | 0 | 14 | 1 | 0 |
| price | 344 | 0.02 | 1 | 4 | 0 | 4 | 0 |
| location.address1 | 337 | 0.04 | 0 | 24 | 3 | 11 | 0 |
| location.address2 | 340 | 0.03 | 0 | 9 | 7 | 4 | 0 |
| location.address3 | 342 | 0.02 | 0 | 0 | 8 | 1 | 0 |
| location.city | 336 | 0.04 | 7 | 14 | 0 | 5 | 0 |
| location.zip_code | 336 | 0.04 | 5 | 5 | 0 | 11 | 0 |
| location.country | 336 | 0.04 | 2 | 2 | 0 | 1 | 0 |
| location.state | 336 | 0.04 | 2 | 2 | 0 | 1 | 0 |
| location.display_address | 336 | 0.04 | 17 | 50 | 0 | 14 | 0 |
| phone | 336 | 0.04 | 12 | 12 | 0 | 14 | 0 |
| display_phone | 336 | 0.04 | 14 | 14 | 0 | 14 | 0 |
| geometry | 0 | 1.00 | 174 | 3619 | 0 | 349 | 0 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| is_closed | 336 | 0.04 | 0 | FAL: 14 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| hhincome14 | 5 | 0.99 | 58938.50 | 34513.34 | 2499.00 | 33361.00 | 50223.00 | 72995.00 | 176615.00 | ▆▇▃▁▁ |
| population14 | 0 | 1.00 | 4803.97 | 2577.39 | 0.00 | 3023.25 | 4482.50 | 5872.25 | 17948.00 | ▅▇▂▁▁ |
| trans.bicycle14 | 0 | 1.00 | 6.92 | 19.57 | 0.00 | 0.00 | 0.00 | 0.00 | 172.00 | ▇▁▁▁▁ |
| trans.walk14 | 0 | 1.00 | 52.36 | 103.10 | 0.00 | 0.00 | 22.00 | 59.00 | 923.00 | ▇▁▁▁▁ |
| ptrans.bicycle14 | 2 | 0.99 | 0.00 | 0.01 | 0.00 | 0.00 | 0.00 | 0.00 | 0.04 | ▇▁▁▁▁ |
| ptrans.walk14 | 2 | 0.99 | 0.01 | 0.02 | 0.00 | 0.00 | 0.00 | 0.01 | 0.23 | ▇▁▁▁▁ |
| hhincome19 | 3 | 0.99 | 72645.04 | 40547.97 | 18913.00 | 44234.00 | 62656.00 | 85312.00 | 236149.00 | ▇▆▂▁▁ |
| population19 | 0 | 1.00 | 5243.85 | 2806.42 | 196.00 | 3237.00 | 4830.00 | 6745.00 | 15411.00 | ▅▇▅▁▁ |
| trans.bicycle19 | 0 | 1.00 | 12.21 | 21.05 | 0.00 | 0.00 | 0.00 | 19.00 | 104.00 | ▇▁▁▁▁ |
| trans.walk19 | 0 | 1.00 | 65.82 | 130.97 | 0.00 | 8.00 | 24.50 | 65.00 | 712.00 | ▇▁▁▁▁ |
| ptrans.bicycle19 | 0 | 1.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.02 | ▇▂▁▁▁ |
| ptrans.walk19 | 0 | 1.00 | 0.01 | 0.02 | 0.00 | 0.00 | 0.01 | 0.01 | 0.12 | ▇▁▁▁▁ |
| trans.bicyclechg | 0 | 1.00 | 5.29 | 21.82 | -138.00 | 0.00 | 0.00 | 11.75 | 104.00 | ▁▁▇▃▁ |
| trans.walkchg | 0 | 1.00 | 13.46 | 115.29 | -681.00 | -19.75 | 5.00 | 34.75 | 710.00 | ▁▁▇▁▁ |
| review_count | 336 | 0.04 | 29.50 | 36.61 | 1.00 | 3.25 | 10.50 | 50.50 | 124.00 | ▇▁▂▁▁ |
| rating | 336 | 0.04 | 3.64 | 1.61 | 1.00 | 2.50 | 4.50 | 4.50 | 5.00 | ▂▁▁▁▇ |
| distance | 336 | 0.04 | 3310.91 | 4403.07 | 230.64 | 773.74 | 1847.05 | 3048.27 | 16217.92 | ▇▁▁▁▁ |
#Some of these missing values might be because there are tracts like on our last analysis where no one lives there, so there's no data. Let's visualize it first an see what we're working with. We can always do a second round of scrubbing if need be.
#Step Seven: Visualizations
Let’s start off by asking ourselves some questions. The first thing I want to know is: where are there no bike rental places where there is a high proportion of cyclists relative to the population?
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(FD_tract_geom) + tm_polygons(col = "ptrans.bicycle19", title= 'Proportion of Cyclists (2019)', palette= "BuGn") +
tm_shape(Yelp_in_geom) + tm_dots(id = "name")
Hey, looks like we’ve got some hotspots where there is a high proportion of cyclists in the metro area! How about pedestrians?
tm_shape(FD_tract_geom) + tm_polygons(col = "ptrans.walk19", title= 'Proportion of Pedestrians (2019)', palette= "BuPu") +
tm_shape(Yelp_in_geom) + tm_dots(id = "name")
Unsurprisingly, where there are lots of cyclists, there are also lots of pedestrians!
Where are there no bike rental places where we have seen an increase in the number of cyclists?
tm_shape(FD_tract_geom) + tm_polygons(col = "trans.bicyclechg", title= 'Change in number of Cyclists (2014-2019)', palette= "BuGn") +
tm_shape(Yelp_in_geom) + tm_dots(id = "name")
Some areas might have decreased, but some places have increased as well–and pretty substantially! This doesn’t tell us much because it’s just a flat number, but once we dig deeper into what’s driving the increase, it might tell us more.
Either way, any of the census tracts with higher numbers for all three variables, but no bike rental shop would be ideal perspective locations.
Since my rental place is just starting out, I might want to set up in a place where the competition isn’t so stiff. I think place with a high number of reviews and a high rating indicates a valued community staple, so I want to see if there are any popular shops in tracts with a high proportion of cyclists so I can have an easier time establishing a niche.
popular_shops <- Yelp_in_geom %>%
arrange(desc(review_count)) %>%
slice(1:4)
ggplot(data = bike_in_FDtract,
aes(x=review_count, y=rating)) +
geom_point(mapping = aes(color = ptrans.bicycle19)) +
geom_point(data = popular_shops, size = 3, shape = 1, color = "black") +
ggrepel::geom_label_repel(data = popular_shops, mapping = aes(label = name)) +
labs(x = "Review Count in Yelp",
y = "Rating in Yelp",
color = "Proportion of Cyclists",
title = "What are the More Popular Rental Shops?") +
scale_color_gradient(low="darkblue", high="yellow") +
theme_light()
## Warning: Removed 336 rows containing missing values (geom_point).
Looks like these areas all have relatively low proportions of cyclists.
I’ll make sure to double check these stores against my potential census
tracts later in my analysis.