Import Libraries
library(tidycensus)
library(sf)
library(tmap)
library(tidyverse)
library(here)
library(knitr)
library(kableExtra)
library(glue)
library(tigris)
data_all <- readRDS(here::here("ithaca_poi_data_original.rds"))
#2. Tidy my data ##2.1. Remove duplicated rows
# Remove duplicated rows
data_all_unique <- data_all %>%
distinct(places.id, .keep_all = TRUE)
glue::glue("Before dropping duplicated rows, there were {nrow(data_all)} rows. After dropping them, there are {nrow(data_all_unique)} rows.")
## Before dropping duplicated rows, there were 103 rows. After dropping them, there are 18 rows.
##2.2. Flatten/unnest list-columns
#Collapse the places.types column
poi_flat <- data_all_unique %>%
mutate(places.types = places.types %>%
map_chr(., ~ str_c(.x, collapse = ",")))
## Handle list-columns other than places.types
### Create a separate dataset from the `places.reviews` column
reviews <- poi_flat %>% select(places.id, places.reviews)
### Convert `reviews` into a long format
reviews <- reviews %>%
unnest_longer(places.reviews) %>% # one row per review
unnest_wider(places.reviews) # expand review details into columns
### Remove the now-redundant reviews column from the main POI dataset
poi_flat <- poi_flat %>% select(-places.reviews)
##2.3. Handle missing values I first verified that the “rating” column is complete (no missing values). To analyze whether ratings vary with price, I then restricted the sample to POIs with a non-missing “priceLevel”. Next, I used “dineIn” as a proxy for restaurant service and excluded rows where “dineIn” is missing (treating those as not proper restaurants). Finally, because Cornell students frequently pick up food between classes, I further limited the sample to venues that offer takeout.
# Drop rows that have missing values in any of the four columns
poi_dropna <- poi_flat %>%
drop_na(places.priceLevel, places.dineIn, places.takeout)
glue::glue("Before dropping missing value rows, there were {nrow(poi_flat)} rows. After dropping them, there are {nrow(poi_dropna)} rows.")
## Before dropping missing value rows, there were 18 rows. After dropping them, there are 14 rows.
##2.4. Filter by location
# Filter by location
ithaca <- tigris::places("NY") %>%
filter(NAME == 'Ithaca') %>%
st_transform(4326)
# Converting poi_dropna into a sf object
poi_sf <- poi_dropna %>%
st_as_sf(coords=c("places.location.longitude", "places.location.latitude"),
crs = 4326)
# POIs within the city boundary
poi_sf_in <- poi_sf[ithaca, ]
glue::glue("Before dropping POIs outside of Ithaca, there were {nrow(poi_sf)} rows. After dropping them, there are {nrow(poi_sf_in)} rows.")
## Before dropping POIs outside of Ithaca, there were 14 rows. After dropping them, there are 12 rows.
## Drop some unnecessary columns
poi_clean <- poi_sf_in %>%
select(
-places.displayName.languageCode,
-places.reviewSummary.flagContentUri,
-places.reviewSummary.reviewsUri,
-places.reviewSummary.text.text,
-places.reviewSummary.text.languageCode,
-places.reviewSummary.disclosureText.text,
-places.reviewSummary.disclosureText.languageCode)
## Show
poi_clean %>%
head(10) %>%
kableExtra::kable() %>%
kableExtra::kable_styling(full_width = FALSE)
places.id | places.types | places.formattedAddress | places.rating | places.priceLevel | places.userRatingCount | places.takeout | places.dineIn | places.delivery | places.displayName.text | geometry |
---|---|---|---|---|---|---|---|---|---|---|
ChIJd5TQDg6B0IkRtBMqZ4QOWyo | chinese_restaurant,japanese_restaurant,restaurant,point_of_interest,food,establishment | 106 Fairgrounds Memorial Pkwy, Ithaca, NY 14850, USA | 3.9 | PRICE_LEVEL_INEXPENSIVE | 1204 | TRUE | TRUE | NA | Spring Buffet | POINT (-76.51203 42.43081) |
ChIJaeZOwqCB0IkRn2nA111V5-A | restaurant,sushi_restaurant,ramen_restaurant,american_restaurant,japanese_restaurant,point_of_interest,food,establishment | 722 S Meadow St Suit 900, Ithaca, NY 14850, USA | 4.8 | PRICE_LEVEL_MODERATE | 151 | TRUE | TRUE | TRUE | Pokelava Poke Bowl - Downtown | POINT (-76.50925 42.4308) |
ChIJx_f-hbGB0IkRZKH7lb_57PI | sushi_restaurant,ramen_restaurant,tea_house,japanese_restaurant,restaurant,point_of_interest,food,establishment | 740 S Meadow St, Ithaca, NY 14850, USA | 4.2 | PRICE_LEVEL_INEXPENSIVE | 245 | TRUE | TRUE | TRUE | Taichi Bubble Tea - Ithaca | POINT (-76.50849 42.4301) |
ChIJlYR7IGiB0IkRbvPxgZHICPE | ramen_restaurant,asian_restaurant,korean_restaurant,japanese_restaurant,restaurant,point_of_interest,food,establishment | 512 W State St, Ithaca, NY 14850, USA | 4.6 | PRICE_LEVEL_MODERATE | 927 | TRUE | TRUE | TRUE | Maru Ramen | POINT (-76.5068 42.4395) |
ChIJm-ch1pCB0IkRYkLCcVuEr_g | korean_restaurant,asian_restaurant,restaurant,point_of_interest,food,establishment | 124 E State St, Ithaca, NY 14850, USA | 4.3 | PRICE_LEVEL_MODERATE | 261 | TRUE | TRUE | TRUE | Kimchi | POINT (-76.49808 42.43981) |
ChIJ_yx8Eo6B0IkRRfLPVFdcLPU | korean_restaurant,restaurant,point_of_interest,food,establishment | 147 Dryden Rd, Ithaca, NY 14850, USA | 4.1 | PRICE_LEVEL_MODERATE | 118 | TRUE | TRUE | TRUE | So Poong | POINT (-76.48597 42.44146) |
ChIJT587Po6B0IkRiE2s9_l8u1c | japanese_restaurant,sushi_restaurant,asian_restaurant,restaurant,point_of_interest,food,establishment | 113 Dryden Rd, Ithaca, NY 14850, USA | 3.7 | PRICE_LEVEL_MODERATE | 152 | TRUE | TRUE | TRUE | Plum Tree Restaurant | POINT (-76.48656 42.44155) |
ChIJH0xAa46B0IkRzZXALva_JXc | korean_restaurant,chinese_restaurant,restaurant,point_of_interest,food,establishment | 104 Dryden Rd, Ithaca, NY 14850, USA | 3.9 | PRICE_LEVEL_MODERATE | 82 | TRUE | TRUE | TRUE | Gangnam Station | POINT (-76.48639 42.44174) |
ChIJrUH84Y2B0IkRVl6DSs39uPA | korean_restaurant,asian_restaurant,restaurant,point_of_interest,food,establishment | 321 College Ave, Ithaca, NY 14850, USA | 4.0 | PRICE_LEVEL_MODERATE | 274 | TRUE | TRUE | TRUE | Koko | POINT (-76.48503 42.44131) |
ChIJo8Lj3o2B0IkRM5Lp3TT2GRs | japanese_restaurant,restaurant,point_of_interest,food,establishment | 401 College Ave, Ithaca, NY 14850, USA | 4.3 | PRICE_LEVEL_INEXPENSIVE | 220 | TRUE | TRUE | TRUE | Oishii Bowl | POINT (-76.48512 42.44172) |
tmap_mode("view")
# Map (Korean, Japanese Restarunts)
poi_viz <- poi_clean %>%
mutate(
cuisine = case_when(
grepl("japanese_restaurant", places.types) ~ "Japanese",
grepl("korean_restaurant", places.types) ~ "Korean",
TRUE ~ "Other"
)
)
tm_shape(poi_viz) +
tm_dots(
col = "cuisine",
size = 0.7,
palette = c("Japanese" = "red",
"Korean" = "blue"),
popup.vars = c("Name" = "places.displayName.text",
"Rating" = "places.rating",
"Rating Count" = "places.userRatingCount")
) +
tm_shape(ithaca) +
tm_borders()
It appears that Korean restaurants are clustered near Cornell University, on the east side of Ithaca. Interestingly, there are none in the southwest part of the city, which is the main grocery and shopping area. Since that area is not part of students’ everyday routines, Korean restaurant owners may have chosen to locate closer to where students spend most of their time, ensuring greater exposure in daily life. By contrast, three Japanese restaurants are situated in the southwest area. This might reflect that Japanese food is somewhat more familiar to the general public in Ithaca, making it reasonable for restaurants to locate near markets.
# Map
tm_shape(poi_clean) +
tm_dots(col = "places.rating",
size = "places.userRatingCount",
palette = "magma",
popup.vars = c("Name" = "places.displayName.text",
"Rating" = "places.rating",
"Rating Count" = "places.userRatingCount")) +
tm_shape(ithaca) +
tm_borders()
Among the restaurants, Maru Ramen in the Downtown area stood out with both a high rating and a large number of customer reviews. In contrast, most restaurants in the eastern part of the city had fewer reviews and ratings below 4.2. Overall, there seems to be a rough pattern where a larger number of reviews corresponds to higher ratings, suggesting that satisfied customers may be more inclined to leave feedback.
# Map
tm_shape(poi_clean) +
tm_dots(col = "places.rating",
size = "places.priceLevel",
size.scale = 0.1,
palette = "magma",
popup.vars = c("Name" = "places.displayName.text",
"Rating" = "places.rating",
"Rating Count" = "places.userRatingCount")) +
tm_shape(ithaca) +
tm_borders()
Overall, the price levels of restaurants are fairly similar. However, those in the southwest area tend to be slightly less expensive. There are two restaurants with ratings above 4.6: one located in the southwest with a lower price level, and another in the east with a higher price level. Given the wide range of ratings in the eastern area, there does not appear to be a clear relationship between restaurant price level and rating.