Load libraries and R Data from assignment 1
library(tidyverse)
load("/home/rstudio/Mod 1.RData")
#yelp_all (restaurants)
#yelp_all2 (hiking)
#yelp_all_c (combined)
first flatten the data frame to remove nested columns in
location
yelp_flat_c$coordinates%>% head()
Warning: Unknown or uninitialised column: `coordinates`.
NULL
concatenate the transaction column
yelp_concat_c <- yelp_flat_c %>%
mutate(transactions = transactions %>%
map_chr(., function(x) str_c(x, collapse=", ")))
head(yelp_concat_c)
function to flatten the category column
# Custom function that takes the data frame in "categories" column in Yelp data
# and returns a character vector
concate_list_c <- function(x){
# x is a data frame with columns "alias" and "title" from Yelp$categories
# returns a character vector containing category concatenated titles
titles <- x[["title"]] %>% str_c(collapse = ", ")
return(titles)
}
yelp_flat2_c <- yelp_concat_c %>%
mutate(categories = categories %>% map_chr(concate_list))
yelp_flat2_c %>% print(width = 1000)
removing duplicate values
yelp_unique_c <- yelp_flat2_c %>%
distinct(id, .keep_all=T)
glue::glue("Before dropping duplicated rows, there were {nrow(yelp_flat2_c)} rows. After dropping them, there are {nrow(yelp_unique_c)} rows") %>%
print()
Before dropping duplicated rows, there were 2818 rows. After dropping them, there are 749 rows
Are there any NA values
yelp_unique_c %>%
map_dbl(., function(x) sum(is.na(x)))
id alias
0 0
name image_url
0 0
is_closed url
0 0
review_count categories
0 0
rating transactions
0 0
price phone
208 0
display_phone distance
0 0
coordinates.latitude coordinates.longitude
0 0
location.address1 location.address2
5 161
location.address3 location.city
263 0
location.zip_code location.country
0 0
location.state location.display_address
0 0
# map_dbl is a variant of map() which outputs
# numeric vector rather than a list.
There are 5 NA values in $location.address1 - and the other location
address columns which don’t matter either because there are no missing
values from lat/long columns. NA values in $price may not matter either
for this study.
Try removing NA values in $price
yelp_dropna2_c <- yelp_unique_c %>%
drop_na(price)
This was bad, all the ‘hiking’ sites had NA values in price and I
lost part of my data. UNDO
Checking for data cases outside of the Chatham County geographic
border
# census boundary
census <- read_sf("/home/rstudio/Mod 1/chatham.shp") %>%
st_transform(4326)
# Converting yelp_unique_c into a sf object
yelp_sf_c <- yelp_unique_c %>%
st_as_sf(coords=c("coordinates.longitude", "coordinates.latitude"), crs = 4326)
# sf subsets
yelp_in_c <- yelp_sf_c[census %>%
st_union(), ,op = st_intersects]
Compare tidy and messy data
glue::glue("nrow before: {nrow(yelp_all_c)} -> nrow after: {nrow(yelp_in_c)} \n
ncol before: {ncol(yelp_all_c)} -> ncol after: {ncol(yelp_in_c)} \n") %>%
print()
nrow before: 2818 -> nrow after: 709
ncol before: 16 -> ncol after: 23
Visualize
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(yelp_in_c) + tm_dots(col = "price")
More visualizations
tmap_mode("view")
tmap mode set to interactive viewing
tm_shape(yelp_in_c) + tm_dots(col = "rating")
##The arrangement of food business in the gridded streets of Savannah
contrast sharply with the more organic shorelines of coastal Chatham
county’s hiking trails and the inland restaurants in suburban Savannah.
While this could partially be seen before, after tidying the data, the
pattern becomes clearer, emphasizing the relationship between urban form
and businesses. Looking at the relationship between price point and
rating seems to indicate that the density of businesses allows for more
high star/ high price point restaurants. The three main pockets of high
review count businesses are downtown Savannah, near the Savannah/ Hilton
Head Airport, and on Tybee Island, a popular vacation destination. The
largest grouping of 3 dollar sign restaurants occurs in downtown
Savannah, especially by the waterfront, and in what is considered the
most touristy area. One thing that is interesting is that looking at the
map of Chatham county zoomed all the way out, you can only see 1 dollar
sign and 2 dollar sign restaurants, while looking at the ratings map,
there appear to be more 3-4 and 4-5 star restaurants than any other
category. This indicates that Chatham county has a good value money in
it’s dining scene. ##
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCiMgTG9hZCBsaWJyYXJpZXMgYW5kIFIgRGF0YSBmcm9tIGFzc2lnbm1lbnQgMQpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCgpsb2FkKCIvaG9tZS9yc3R1ZGlvL01vZCAxLlJEYXRhIikKI3llbHBfYWxsIChyZXN0YXVyYW50cykKI3llbHBfYWxsMiAoaGlraW5nKQojeWVscF9hbGxfYyAoY29tYmluZWQpCmBgYAoKCiMgZmlyc3QgZmxhdHRlbiB0aGUgZGF0YSBmcmFtZSB0byByZW1vdmUgbmVzdGVkIGNvbHVtbnMgaW4gbG9jYXRpb24KYGBge3J9CnllbHBfZmxhdF9jIDwtIHllbHBfYWxsX2MgJT4lIAogIGpzb25saXRlOjpmbGF0dGVuKCkgJT4lIAogIGFzX3RpYmJsZSgpIAoKeWVscF9mbGF0X2MkY29vcmRpbmF0ZXMlPiUgaGVhZCgpCmBgYAoKIyBjb25jYXRlbmF0ZSB0aGUgdHJhbnNhY3Rpb24gY29sdW1uCmBgYHtyfQp5ZWxwX2NvbmNhdF9jIDwtIHllbHBfZmxhdF9jICU+JSAKICBtdXRhdGUodHJhbnNhY3Rpb25zID0gdHJhbnNhY3Rpb25zICU+JSAKICAgICAgICAgICBtYXBfY2hyKC4sIGZ1bmN0aW9uKHgpIHN0cl9jKHgsIGNvbGxhcHNlPSIsICIpKSkKaGVhZCh5ZWxwX2NvbmNhdF9jKQpgYGAKCiMgZnVuY3Rpb24gdG8gZmxhdHRlbiB0aGUgY2F0ZWdvcnkgY29sdW1uCmBgYHtyfQoKIyBDdXN0b20gZnVuY3Rpb24gdGhhdCB0YWtlcyB0aGUgZGF0YSBmcmFtZSBpbiAiY2F0ZWdvcmllcyIgY29sdW1uIGluIFllbHAgZGF0YQojIGFuZCByZXR1cm5zIGEgY2hhcmFjdGVyIHZlY3Rvcgpjb25jYXRlX2xpc3RfYyA8LSBmdW5jdGlvbih4KXsKICAjIHggaXMgYSBkYXRhIGZyYW1lIHdpdGggY29sdW1ucyAiYWxpYXMiIGFuZCAidGl0bGUiIGZyb20gWWVscCRjYXRlZ29yaWVzCiAgIyByZXR1cm5zIGEgY2hhcmFjdGVyIHZlY3RvciBjb250YWluaW5nIGNhdGVnb3J5IGNvbmNhdGVuYXRlZCB0aXRsZXMgCiAgdGl0bGVzIDwtIHhbWyJ0aXRsZSJdXSAlPiUgc3RyX2MoY29sbGFwc2UgPSAiLCAiKQogIHJldHVybih0aXRsZXMpCn0KCnllbHBfZmxhdDJfYyA8LSB5ZWxwX2NvbmNhdF9jICU+JSAKICBtdXRhdGUoY2F0ZWdvcmllcyA9IGNhdGVnb3JpZXMgJT4lIG1hcF9jaHIoY29uY2F0ZV9saXN0KSkKCnllbHBfZmxhdDJfYyAlPiUgcHJpbnQod2lkdGggPSAxMDAwKQpgYGAKCgojIHJlbW92aW5nIGR1cGxpY2F0ZSB2YWx1ZXMKYGBge3J9CnllbHBfdW5pcXVlX2MgPC0geWVscF9mbGF0Ml9jICU+JSAKICBkaXN0aW5jdChpZCwgLmtlZXBfYWxsPVQpCgpnbHVlOjpnbHVlKCJCZWZvcmUgZHJvcHBpbmcgZHVwbGljYXRlZCByb3dzLCB0aGVyZSB3ZXJlIHtucm93KHllbHBfZmxhdDJfYyl9IHJvd3MuIEFmdGVyIGRyb3BwaW5nIHRoZW0sIHRoZXJlIGFyZSB7bnJvdyh5ZWxwX3VuaXF1ZV9jKX0gcm93cyIpICU+JSAKICBwcmludCgpCmBgYAoKIyBBcmUgdGhlcmUgYW55IE5BIHZhbHVlcwpgYGB7cn0KeWVscF91bmlxdWVfYyAlPiUgCiAgbWFwX2RibCguLCBmdW5jdGlvbih4KSBzdW0oaXMubmEoeCkpKSAKICAjIG1hcF9kYmwgaXMgYSB2YXJpYW50IG9mIG1hcCgpIHdoaWNoIG91dHB1dHMgCiAgIyBudW1lcmljIHZlY3RvciByYXRoZXIgdGhhbiBhIGxpc3QuCmBgYAojIFRoZXJlIGFyZSA1IE5BIHZhbHVlcyBpbiAkbG9jYXRpb24uYWRkcmVzczEgLSBhbmQgdGhlIG90aGVyIGxvY2F0aW9uIGFkZHJlc3MgY29sdW1ucyB3aGljaCBkb24ndCBtYXR0ZXIgZWl0aGVyIGJlY2F1c2UgdGhlcmUgYXJlIG5vIG1pc3NpbmcgdmFsdWVzIGZyb20gbGF0L2xvbmcgY29sdW1ucy4gTkEgdmFsdWVzIGluICRwcmljZSBtYXkgbm90IG1hdHRlciBlaXRoZXIgZm9yIHRoaXMgc3R1ZHkuCgojIFRyeSByZW1vdmluZyBOQSB2YWx1ZXMgaW4gJHByaWNlCmBgYHtyfQojeWVscF9kcm9wbmEyX2MgPC0geWVscF91bmlxdWVfYyAlPiUgCiAgI2Ryb3BfbmEocHJpY2UpCmBgYAojIFRoaXMgd2FzIGJhZCwgYWxsIHRoZSAnaGlraW5nJyBzaXRlcyBoYWQgTkEgdmFsdWVzIGluIHByaWNlIGFuZCBJIGxvc3QgcGFydCBvZiBteSBkYXRhLiBVTkRPCgoKIyBDaGVja2luZyBmb3IgZGF0YSBjYXNlcyBvdXRzaWRlIG9mIHRoZSBDaGF0aGFtIENvdW50eSBnZW9ncmFwaGljIGJvcmRlcgpgYGB7cn0KIyBjZW5zdXMgYm91bmRhcnkKY2Vuc3VzIDwtIHJlYWRfc2YoIi9ob21lL3JzdHVkaW8vTW9kIDEvY2hhdGhhbS5zaHAiKSAlPiUKICBzdF90cmFuc2Zvcm0oNDMyNikKCgojIENvbnZlcnRpbmcgeWVscF91bmlxdWVfYyBpbnRvIGEgc2Ygb2JqZWN0CnllbHBfc2ZfYyA8LSB5ZWxwX3VuaXF1ZV9jICU+JSAKICBzdF9hc19zZihjb29yZHM9YygiY29vcmRpbmF0ZXMubG9uZ2l0dWRlIiwgImNvb3JkaW5hdGVzLmxhdGl0dWRlIiksIGNycyA9IDQzMjYpCiAgCiMgc2Ygc3Vic2V0cwp5ZWxwX2luX2MgPC0geWVscF9zZl9jW2NlbnN1cyAlPiUgCiAgICAgICAgICAgICAgICAgICAgIHN0X3VuaW9uKCksICxvcCA9IHN0X2ludGVyc2VjdHNdCmBgYAoKCiMgQ29tcGFyZSB0aWR5IGFuZCBtZXNzeSBkYXRhCmBgYHtyfQpnbHVlOjpnbHVlKCJucm93IGJlZm9yZToge25yb3coeWVscF9hbGxfYyl9IC0+IG5yb3cgYWZ0ZXI6IHtucm93KHllbHBfaW5fYyl9IFxuCiAgICAgICAgICAgIG5jb2wgYmVmb3JlOiB7bmNvbCh5ZWxwX2FsbF9jKX0gLT4gbmNvbCBhZnRlcjoge25jb2woeWVscF9pbl9jKX0gXG4iKSAlPiUgCiAgcHJpbnQoKQpgYGAKCgojIFZpc3VhbGl6ZQpgYGB7cn0KdG1hcF9tb2RlKCJ2aWV3IikKdG1fc2hhcGUoeWVscF9pbl9jKSArIHRtX2RvdHMoY29sID0gInByaWNlIikKYGBgCgojIE1vcmUgdmlzdWFsaXphdGlvbnMKYGBge3J9CnRtYXBfbW9kZSgidmlldyIpCnRtX3NoYXBlKHllbHBfaW5fYykgKyB0bV9kb3RzKGNvbCA9ICJyYXRpbmciKQpgYGAKCiMjVGhlIGFycmFuZ2VtZW50IG9mIGZvb2QgYnVzaW5lc3MgaW4gdGhlIGdyaWRkZWQgc3RyZWV0cyBvZiBTYXZhbm5haCBjb250cmFzdCBzaGFycGx5IHdpdGggdGhlIG1vcmUgb3JnYW5pYyBzaG9yZWxpbmVzIG9mIGNvYXN0YWwgQ2hhdGhhbSBjb3VudHnigJlzIGhpa2luZyB0cmFpbHMgYW5kIHRoZSBpbmxhbmQgcmVzdGF1cmFudHMgaW4gc3VidXJiYW4gU2F2YW5uYWguIFdoaWxlIHRoaXMgY291bGQgcGFydGlhbGx5IGJlIHNlZW4gYmVmb3JlLCBhZnRlciB0aWR5aW5nIHRoZSBkYXRhLCB0aGUgcGF0dGVybiBiZWNvbWVzIGNsZWFyZXIsIGVtcGhhc2l6aW5nIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiB1cmJhbiBmb3JtIGFuZCBidXNpbmVzc2VzLiBMb29raW5nIGF0IHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiBwcmljZSBwb2ludCBhbmQgcmF0aW5nIHNlZW1zIHRvIGluZGljYXRlIHRoYXQgdGhlIGRlbnNpdHkgb2YgYnVzaW5lc3NlcyBhbGxvd3MgZm9yIG1vcmUgaGlnaCBzdGFyLyBoaWdoIHByaWNlIHBvaW50IHJlc3RhdXJhbnRzLiBUaGUgdGhyZWUgbWFpbiBwb2NrZXRzIG9mIGhpZ2ggcmV2aWV3IGNvdW50IGJ1c2luZXNzZXMgYXJlIGRvd250b3duIFNhdmFubmFoLCBuZWFyIHRoZSBTYXZhbm5haC8gSGlsdG9uIEhlYWQgQWlycG9ydCwgYW5kIG9uIFR5YmVlIElzbGFuZCwgYSBwb3B1bGFyIHZhY2F0aW9uIGRlc3RpbmF0aW9uLiBUaGUgbGFyZ2VzdCBncm91cGluZyBvZiAzIGRvbGxhciBzaWduIHJlc3RhdXJhbnRzIG9jY3VycyBpbiBkb3dudG93biBTYXZhbm5haCwgZXNwZWNpYWxseSBieSB0aGUgd2F0ZXJmcm9udCwgYW5kIGluIHdoYXQgaXMgY29uc2lkZXJlZCB0aGUgbW9zdCB0b3VyaXN0eSBhcmVhLiBPbmUgdGhpbmcgdGhhdCBpcyBpbnRlcmVzdGluZyBpcyB0aGF0IGxvb2tpbmcgYXQgdGhlIG1hcCBvZiBDaGF0aGFtIGNvdW50eSB6b29tZWQgYWxsIHRoZSB3YXkgb3V0LCB5b3UgY2FuIG9ubHkgc2VlIDEgZG9sbGFyIHNpZ24gYW5kIDIgZG9sbGFyIHNpZ24gcmVzdGF1cmFudHMsIHdoaWxlIGxvb2tpbmcgYXQgdGhlIHJhdGluZ3MgbWFwLCB0aGVyZSBhcHBlYXIgdG8gYmUgbW9yZSAzLTQgYW5kIDQtNSBzdGFyIHJlc3RhdXJhbnRzIHRoYW4gYW55IG90aGVyIGNhdGVnb3J5LiBUaGlzIGluZGljYXRlcyB0aGF0IENoYXRoYW0gY291bnR5IGhhcyBhIGdvb2QgdmFsdWUgbW9uZXkgaW4gaXTigJlzIGRpbmluZyBzY2VuZS4gIyMKCgoK