Census API
tidycensus::census_api_key(Sys.getenv("census_api"))
## To install your API key for use in future sessions, run this function with `install = TRUE`.
Library packages
Census Tracts, Cherokee County, Canton City
#### Tract polygons
tract <- suppressMessages(
get_acs(geography = "tract",
state = "GA",
county = c("Cherokee"),
variables = c(hhincome = 'B19019_001'),
year = 2021,
survey = "acs5",
geometry = TRUE,
output = "wide")
)
canton <- tigris::places('GA') %>%
filter(NAME == 'Canton')
## Retrieving data for the year 2022
tract_canton <- tract[canton,]
View the data
message(sprintf("nrow: %s, ncol: %s", nrow(tract_canton), ncol(tract_canton)))
## nrow: 14, ncol: 5
tract_canton %>% head() %>% knitr::kable()
| GEOID | NAME | hhincomeE | hhincomeM | geometry | |
|---|---|---|---|---|---|
| 2 | 13057090707 | Census Tract 907.07, Cherokee County, Georgia | 85684 | 15839 | MULTIPOLYGON (((-84.52836 3… |
| 10 | 13057090703 | Census Tract 907.03, Cherokee County, Georgia | 43647 | 13022 | MULTIPOLYGON (((-84.51658 3… |
| 16 | 13057090301 | Census Tract 903.01, Cherokee County, Georgia | 126250 | 32291 | MULTIPOLYGON (((-84.59071 3… |
| 17 | 13057090603 | Census Tract 906.03, Cherokee County, Georgia | 86563 | 23368 | MULTIPOLYGON (((-84.48169 3… |
| 20 | 13057090102 | Census Tract 901.02, Cherokee County, Georgia | 75840 | 9309 | MULTIPOLYGON (((-84.49538 3… |
| 24 | 13057090710 | Census Tract 907.10, Cherokee County, Georgia | 100893 | 5307 | MULTIPOLYGON (((-84.55758 3… |
Map of Canton City
tract_canton <- tract_canton %>%
select(GEOID,
hhincome = hhincomeE) # New name = old name
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(tract_canton) + tm_borders(lwd = 2) +
tm_shape(canton) + tm_polygons(col = 'red', alpha = 0.4)
# Function: Get tract-wise radius
get_r <- function(poly, epsg_id){
#---------------------
# Takes: a single POLYGON or LINESTRTING
# Outputs: distance between the centroid of the boundingbox and a corner of the bounding box
#---------------------
# Get bounding box of a given polygon
bb <- st_bbox(poly)
# Get lat & long coordinates of any one corner of the bounding box.
bb_corner <- st_point(c(bb[1], bb[2])) %>% st_sfc(crs = epsg_id)
# Get centroid of the bb
bb_center_x <- (bb[3]+bb[1])/2
bb_center_y <- (bb[4]+bb[2])/2
bb_center <- st_point(c(bb_center_x, bb_center_y)) %>% st_sfc(crs = epsg_id) %>% st_sf()
# Get the distance between bb_p and c
r <- st_distance(bb_corner, bb_center)
# Multiply 1.1 to make the circle a bit larger than the Census Tract.
# See the Yelp explanation of their radius parameter to see why we do this.
bb_center$radius <- r*1.1
return(bb_center)
}
## Using a loop -----------------------------------------------------------------
# Creating an empty vector of NA.
# Results will fill this vector
epsg_id <- 4326
r4all_loop <- vector("list", nrow(tract_canton))
# Starting a for-loop
for (i in 1:nrow(tract_canton)){
r4all_loop[[i]] <- tract_canton %>%
st_transform(crs = epsg_id) %>%
st_geometry() %>%
.[[i]] %>%
get_r(epsg_id = epsg_id)
}
r4all_loop <- bind_rows(r4all_loop)
# Using a functional -----------------------------------------------------------
# We use a functional (sapply) to apply this custom function to each Census Tract.
r4all_apply <- tract_canton %>%
st_geometry() %>%
st_transform(crs = epsg_id) %>%
lapply(., function(x) get_r(x, epsg_id = epsg_id))
r4all_apply <- bind_rows(r4all_apply)
# Are these two identical?
identical(r4all_apply, r4all_loop)
## [1] TRUE
# Appending X Y coordinates as seprate columns
ready_4_yelp <- r4all_apply %>%
mutate(x = st_coordinates(.)[,1],
y = st_coordinates(.)[,2])
Map of Census Tracts and Buffers in Canton City
tmap_mode('view')
## tmap mode set to interactive viewing
ready_4_yelp %>%
# Draw a buffer centered at the centroid of Tract polygons.
# Radius of the buffer is the radius we just calculated using loop
st_buffer(., dist = .$radius) %>%
# Display this buffer in red
tm_shape(.) + tm_polygons(alpha = 0.5, col = 'red') +
# Display the original polygon in blue
tm_shape(tract_canton) + tm_borders(col= 'blue')
Yelp API, Data Access: Categories: “Hotels” and “Car Rental”
# FUNCTION
get_yelp <- function(tract, category){
# ----------------------------------
# Gets one row of tract information (1,) and category name (str),
# Outputs a list of business data.frame
Sys.sleep(1)
n <- 1
# First request --------------------------------------------------------------
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50, # = 0 when n = 1
radius = round(tract$radius),
limit = 50)
# Calculate how many requests are needed in total
required_n <- ceiling(resp$total/50)
# out is where the results will be appended to.
out <- vector("list", required_n)
# Store the business information to nth slot in out
out[[n]] <- resp$businesses
# Change the name of the elements to the total required_n
# This is to know if there are more than 1000 businesses,
# we know how many.
names(out)[n] <- required_n
# Throw error if more than 1000
if (resp$total >= 1000)
{
# glue formats string by inserting {n} with what's currently stored in object n.
print(glue::glue("{n}th row has >= 1000 businesses."))
# Stop before going into the loop because we need to
# break down Census Tract to something smaller.
return(out)
}
else
{
# add 1 to n
n <- n + 1
# Now we know required_n -----------------------------------------------------
# Starting a loop
while(n <= required_n){
resp <- business_search(api_key = Sys.getenv("yelp_api"),
categories = category,
latitude = tract$y,
longitude = tract$x,
offset = (n - 1) * 50,
radius = round(tract$radius),
limit = 50)
out[[n]] <- resp$businesses
n <- n + 1
} #<< end of while loop
# Merge all elements in the list into a single data frame
out <- out %>% bind_rows()
return(out)
}
}
# Prepare a collector
yelp_all_list <- vector("list", nrow(ready_4_yelp))
# Looping through all Census Tracts
for (row in 1:nrow(ready_4_yelp)){
yelp_hotel <- suppressMessages(get_yelp(ready_4_yelp[row, ], "hotels"))
yelp_car_rental <- suppressMessages(get_yelp(ready_4_yelp[row, ], "carrental"))
yelp_all_list[[row]] <- bind_rows(yelp_hotel, yelp_car_rental)
print(paste0("Current row: ", row))
}
## [1] "Current row: 1"
## [1] "Current row: 2"
## [1] "Current row: 3"
## [1] "Current row: 4"
## [1] "Current row: 5"
## [1] "Current row: 6"
## [1] "Current row: 7"
## [1] "Current row: 8"
## [1] "Current row: 9"
## [1] "Current row: 10"
## [1] "Current row: 11"
## [1] "Current row: 12"
## [1] "Current row: 13"
## [1] "Current row: 14"
# Collapsing the list into a data.frame
yelp_all <- yelp_all_list %>% bind_rows() %>% as_tibble()
# print
yelp_all %>% print(width=1000)
## # A tibble: 110 × 18
## id alias
## <chr> <chr>
## 1 YgaZ6LTfx8pbSNgGcvZQyg pine-crest-motor-lodge-canton
## 2 s_xSIKvx0mwuRuHELcYVMg mike-the-mechanic-woodstock
## 3 jVeS7LfHJs5D7Ehas1QFHQ classic-convertible-carriages-alpharetta
## 4 YgaZ6LTfx8pbSNgGcvZQyg pine-crest-motor-lodge-canton
## 5 eFyH0aX9MU4hW3Zmi5Cpkg kaelani-kleans-llc-woodstock
## 6 s_xSIKvx0mwuRuHELcYVMg mike-the-mechanic-woodstock
## 7 sNfuHJecsGcVE4b1b2MZ7A noir-luxury-auto-atlanta
## 8 UtpCPt85BKJ1q9oOk1sNxQ ahc-car-rentals-east-point-2
## 9 0J3S8IowlImhKTcRFoDzdg pick-your-leisure-sandy-springs
## 10 jVeS7LfHJs5D7Ehas1QFHQ classic-convertible-carriages-alpharetta
## name
## <chr>
## 1 Pine Crest Motor Lodge
## 2 Mike the Mechanic
## 3 Classic Convertible Carriages
## 4 Pine Crest Motor Lodge
## 5 Kaelani Kleans LLC
## 6 Mike the Mechanic
## 7 Noir Luxury Auto
## 8 AHC Car Rentals
## 9 Pick Your Leisure
## 10 Classic Convertible Carriages
## image_url
## <chr>
## 1 ""
## 2 "https://s3-media4.fl.yelpcdn.com/bphoto/7PYDPexb4FqWAbRdhiiulQ/o.jpg"
## 3 "https://s3-media2.fl.yelpcdn.com/bphoto/O187Bzhd6ww2dcme1c4uSA/o.jpg"
## 4 ""
## 5 "https://s3-media4.fl.yelpcdn.com/bphoto/nqT2OSocOt3EHnPlcc9a-Q/o.jpg"
## 6 "https://s3-media4.fl.yelpcdn.com/bphoto/7PYDPexb4FqWAbRdhiiulQ/o.jpg"
## 7 "https://s3-media3.fl.yelpcdn.com/bphoto/JO0kg3_LZccFFbF2oaYnvA/o.jpg"
## 8 "https://s3-media4.fl.yelpcdn.com/bphoto/jM5RJ0dwK4GQiBhMA7FiSw/o.jpg"
## 9 "https://s3-media2.fl.yelpcdn.com/bphoto/vBx2VOsApyrjCOhTJ_4ohw/o.jpg"
## 10 "https://s3-media2.fl.yelpcdn.com/bphoto/O187Bzhd6ww2dcme1c4uSA/o.jpg"
## is_closed
## <lgl>
## 1 FALSE
## 2 FALSE
## 3 FALSE
## 4 FALSE
## 5 FALSE
## 6 FALSE
## 7 FALSE
## 8 FALSE
## 9 FALSE
## 10 FALSE
## url
## <chr>
## 1 https://www.yelp.com/biz/pine-crest-motor-lodge-canton?adjust_creative=yhocq…
## 2 https://www.yelp.com/biz/mike-the-mechanic-woodstock?adjust_creative=yhocquU…
## 3 https://www.yelp.com/biz/classic-convertible-carriages-alpharetta?adjust_cre…
## 4 https://www.yelp.com/biz/pine-crest-motor-lodge-canton?adjust_creative=yhocq…
## 5 https://www.yelp.com/biz/kaelani-kleans-llc-woodstock?adjust_creative=yhocqu…
## 6 https://www.yelp.com/biz/mike-the-mechanic-woodstock?adjust_creative=yhocquU…
## 7 https://www.yelp.com/biz/noir-luxury-auto-atlanta?adjust_creative=yhocquUnsS…
## 8 https://www.yelp.com/biz/ahc-car-rentals-east-point-2?adjust_creative=yhocqu…
## 9 https://www.yelp.com/biz/pick-your-leisure-sandy-springs?adjust_creative=yho…
## 10 https://www.yelp.com/biz/classic-convertible-carriages-alpharetta?adjust_cre…
## review_count categories rating coordinates$latitude $longitude transactions
## <int> <list> <dbl> <dbl> <dbl> <list>
## 1 3 <df [1 × 2]> 1 34.2 -84.5 <list [0]>
## 2 28 <df [3 × 2]> 3.1 34.1 -84.5 <list [0]>
## 3 0 <df [2 × 2]> 0 34.0 -84.2 <list [0]>
## 4 3 <df [1 × 2]> 1 34.2 -84.5 <list [0]>
## 5 0 <df [3 × 2]> 0 34.1 -84.4 <list [0]>
## 6 28 <df [3 × 2]> 3.1 34.1 -84.5 <list [0]>
## 7 1 <df [3 × 2]> 5 33.8 -84.4 <list [0]>
## 8 0 <df [2 × 2]> 0 33.7 -84.4 <list [0]>
## 9 0 <df [3 × 2]> 0 33.9 -84.4 <list [0]>
## 10 0 <df [2 × 2]> 0 34.0 -84.2 <list [0]>
## location$address1 $address2 $address3 $city $zip_code
## <chr> <chr> <chr> <chr> <chr>
## 1 "2330 Holly Springs Pkwy" "" "" "Canton" 30115
## 2 "451 Toonigh Rd" <NA> "" "Woodstock" 30188
## 3 "" <NA> "" "Alpharetta" 30022
## 4 "2330 Holly Springs Pkwy" "" "" "Canton" 30115
## 5 "317 Pioneer Cir" <NA> "" "Woodstock" 30188
## 6 "451 Toonigh Rd" <NA> "" "Woodstock" 30188
## 7 <NA> <NA> "" "Atlanta" 30308
## 8 "2882 Church St" <NA> <NA> "East Point " 30344
## 9 "5 Concourse Pkwy" "Ste 3000" <NA> "Atlanta" 30326
## 10 "" <NA> "" "Alpharetta" 30022
## $country $state $display_address phone display_phone distance
## <chr> <chr> <list> <chr> <chr> <dbl>
## 1 US GA <chr [2]> +17703455521 (770) 345-5521 1392.
## 2 US GA <chr [2]> +17705174133 (770) 517-4133 4614.
## 3 US GA <chr [1]> +16784510092 (678) 451-0092 30783.
## 4 US GA <chr [2]> +17703455521 (770) 345-5521 1481.
## 5 US GA <chr [2]> +14047819563 (404) 781-9563 9870.
## 6 US GA <chr [2]> +17705174133 (770) 517-4133 3292.
## 7 US GA <chr [1]> +14042775478 (404) 277-5478 45508.
## 8 US GA <chr [2]> +18884061886 (888) 406-1886 54970.
## 9 US GA <chr [3]> +14709905236 (470) 990-5236 30767.
## 10 US GA <chr [1]> +16784510092 (678) 451-0092 28008.
## business_hours attributes$business_temp_closed $waitlist_reservation price
## <list> <lgl> <lgl> <chr>
## 1 <df [1 × 3]> NA NA <NA>
## 2 <df [1 × 3]> NA NA <NA>
## 3 <df [1 × 3]> NA NA <NA>
## 4 <df [1 × 3]> NA NA <NA>
## 5 <df [1 × 3]> NA NA <NA>
## 6 <df [1 × 3]> NA NA <NA>
## 7 <df [1 × 3]> NA NA <NA>
## 8 <df [1 × 3]> NA NA <NA>
## 9 <df [1 × 3]> NA NA <NA>
## 10 <df [1 × 3]> NA NA <NA>
## # ℹ 100 more rows
Tidying data
Duplicates
yelp_unique <- yelp_all %>%
distinct(id, .keep_all=T)
glue::glue("Before dropping duplicated rows, there were {nrow(yelp_all)} rows. After dropping them, there are {nrow(yelp_unique)} rows") %>%
print()
## Before dropping duplicated rows, there were 110 rows. After dropping them, there are 24 rows
Multiple variables in one column
concate_list <- function(x){
# x is a data frame with columns "alias" and "title" from Yelp$categories
# returns a character vector containing category concatenated titles
titles <- x[["title"]] %>% str_c(collapse = ", ")
return(titles)
}
yelp_flat <- yelp_unique %>%
# 1. Flattening columns with data frame
jsonlite::flatten() %>%
# 2. Handling list-columns
mutate(transactions = transactions %>%
map_chr(., function(x) str_c(x, collapse=", ")),
location.display_address = location.display_address %>%
map_chr(., function(x) str_c(x, collapse=", ")),
categories = categories %>% map_chr(concate_list))
Missing values
yelp_flat %>%
map_dbl(., function(x) sum(is.na(x)))
## id alias
## 0 0
## name image_url
## 0 0
## is_closed url
## 0 0
## review_count categories
## 0 0
## rating transactions
## 0 0
## phone display_phone
## 0 0
## distance business_hours
## 0 0
## price coordinates.latitude
## 21 0
## coordinates.longitude location.address1
## 0 3
## location.address2 location.address3
## 14 13
## location.city location.zip_code
## 0 0
## location.country location.state
## 0 0
## location.display_address attributes.business_temp_closed
## 0 24
## attributes.waitlist_reservation
## 24
Drop missing
identical(is.na(yelp_flat$coordinates.latitude),
is.na(yelp_flat$coordinates.longitude))
## [1] TRUE
# Drop rows that have missing values in `coordinates.longitude` and 'coordinates.latitude'
yelp_dropna <- yelp_flat %>%
drop_na(coordinates.longitude, coordinates.latitude)
print(paste0("Before: ", nrow(yelp_flat)))
## [1] "Before: 24"
print(paste0("After: ", nrow(yelp_dropna)))
## [1] "After: 24"
Clear points ouside the city boundary
canton <- tigris::places("GA", progress_bar = FALSE) %>%
filter(NAME == 'Canton') %>%
st_transform(4326)
## Retrieving data for the year 2022
yelp_sf <- yelp_dropna %>%
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"),
crs = 4326)
# sf subsets
yelp_in <- yelp_sf[canton, ]
print(paste0("Before: ", nrow(yelp_sf)))
## [1] "Before: 24"
print(paste0("After: ", nrow(yelp_in)))
## [1] "After: 15"
yelp_in <- yelp_in %>%
mutate(category_type = case_when(
str_detect(categories, "Car Rental") ~ "Car Rental",
str_detect(categories, "Hotels") ~ "Hotels"
))
Final Map of Hotel and Car Rental locations
tm_shape(yelp_in) +
tm_dots(col = "category_type",
palette = c("Car Rental" = "blue", # Assign blue for Car Rental
"Hotels" = "red", # Assign red for Hotels
"Other" = "gray"), # Optional: Assign gray for other categories
title = "Category Type") +
tm_shape(canton) +
tm_borders()
Spatial Pattern and Findings:
Hotels (red dots): Most of the hotels are located northeast of the city center, concentrated near main roads. This proximity suggests they might be close to tourist attractions or the city’s activity center.
Car Rentals (blue dots): Most of car rental locations are clustered to the southwest of the city center, near minor roads. This pattern may indicate proximity to a transportation hub, such as a bus or train station, where travelers might need easy access to rental services.
Total Number of Businesses
Total:15 Hotels:10 Car Rental:5
total_businesses <- nrow(yelp_in)
print(paste0("Total number of businesses: ", total_businesses))
## [1] "Total number of businesses: 15"
Number of Businesses by Category
business_count_by_category <- yelp_in %>%
group_by(category_type) %>%
summarise(count = n())
print(business_count_by_category)
## Simple feature collection with 2 features and 2 fields
## Geometry type: MULTIPOINT
## Dimension: XY
## Bounding box: xmin: -84.50495 ymin: 34.22241 xmax: -84.4608 ymax: 34.25922
## Geodetic CRS: WGS 84
## # A tibble: 2 × 3
## category_type count geometry
## <chr> <int> <MULTIPOINT [°]>
## 1 Car Rental 5 ((-84.4938 34.224), (-84.48464 34.24507), (-84.49941 34.2…
## 2 Hotels 10 ((-84.48037 34.25348), (-84.46423 34.25514), (-84.46961 3…
# Count occurrences of "Car Rental" or "Hotels" in the concatenated categories
car_rental_count <- yelp_in %>%
filter(str_detect(category_type, "Car Rental")) %>%
count()
hotels_count <- yelp_in %>%
filter(str_detect(category_type, "Hotels")) %>%
count()
# Print the counts
print(paste0("Number of Car Rental businesses: ", car_rental_count$n))
## [1] "Number of Car Rental businesses: 5"
print(paste0("Number of Hotels businesses: ", hotels_count$n))
## [1] "Number of Hotels businesses: 10"