Load the Google Places POI data you downloaded for Mini-Assignment 1. As a reminder, state the city you selected and the two POI types you chose in the previous assignment.
asian_grocery_store
and
hiking_area
.# Import data
asian_grocery_store <- readRDS('asian_grocery_store.rds')
hiking_area <- readRDS('hiking_area.rds')
# Preview the asian grocery store data
asian_grocery_store %>%
select(-places.reviews, -places.reviewSummary.text.text) %>%
head(2) %>%
kable()
places.id | places.types | places.formattedAddress | places.rating | places.businessStatus | places.userRatingCount | places.primaryType | places.location.latitude | places.location.longitude | places.displayName.text | places.displayName.languageCode | places.reviewSummary.flagContentUri | places.reviewSummary.reviewsUri | places.reviewSummary.text.languageCode | places.reviewSummary.disclosureText.text | places.reviewSummary.disclosureText.languageCode |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ChIJt8RzSHh644kRLJD0BSpMMzs | market , asian_grocery_store, grocery_store , food_store , store , food , point_of_interest , establishment | 11 Hudson St Unit #A, Boston, MA 02111, USA | 4.1 | OPERATIONAL | 20 | market | 42.35086 | -71.06018 | Happy Family Food Market | en | NA | NA | NA | NA | NA |
ChIJs8yo23F644kRScIIr53rSt0 | asian_grocery_store, supermarket , grocery_store , food_store , store , food , point_of_interest , establishment | 1102 Washington St, Boston, MA 02118, USA | 4.0 | OPERATIONAL | 801 | asian_grocery_store | 42.34423 | -71.06563 | New Ming | en | https://www.google.com/local/review/rap/report?postId=5%401:CAIQACodChtyc19oOlQyTVMxZW9FNlFfZ3htZXJXSS1RSFE%7CCAIQACorChtyc19oOlQyTVMxZW9FNlFfZ3htZXJXSS1RSFESDAi_od_FBhCv-Z2VAQ&d=17924085&t=8 | https://www.google.com/maps/place//data=!4m4!3m3!1s0x89e37a71dba8ccb3:0xdd4aeb9daf08c249!9m1!1b1 | en-US | Summarized with Gemini | en-US |
# Preview the hiking area data
hiking_area %>%
select(-places.reviews, -places.reviewSummary.text.text) %>%
head(2) %>%
kable()
places.id | places.types | places.formattedAddress | places.rating | places.businessStatus | places.userRatingCount | places.primaryType | places.location.latitude | places.location.longitude | places.displayName.text | places.displayName.languageCode | places.reviewSummary.flagContentUri | places.reviewSummary.reviewsUri | places.reviewSummary.text.languageCode | places.reviewSummary.disclosureText.text | places.reviewSummary.disclosureText.languageCode |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ChIJPyGMcS9u44kRja36dM05hEQ | tourist_attraction , hiking_area , state_park , park , sports_activity_location, point_of_interest , establishment | 1399 Bennington St, Boston, MA 02128, USA | 4.5 | OPERATIONAL | 554 | park | 42.3915 | -70.99343 | Belle Isle Marsh Reservation | en | NA | NA | NA | NA | NA |
ChIJPyGMcS9u44kRja36dM05hEQ | tourist_attraction , state_park , hiking_area , park , sports_activity_location, point_of_interest , establishment | 1399 Bennington St, Boston, MA 02128, USA | 4.5 | OPERATIONAL | 554 | park | 42.3915 | -70.99343 | Belle Isle Marsh Reservation | en | NA | NA | NA | NA | NA |
Work through the following steps to clean and prepare your dataset:
places.types
column so that each element
contains a single string value.places.types
, handle them appropriately while ensuring each
row still represents a unique POI.NA
values in
columns that you consider important. Explain your reasoning. Report how
many rows remain after this step.# 1. Remove duplicated rows
grocery_unique <- asian_grocery_store %>% distinct(places.id, .keep_all=T)
# Show how the number of rows has changed after removing
glue::glue("(Asian grocery store) Before dropping duplicated rows, there were {nrow(asian_grocery_store)} rows. After dropping them, there are {nrow(grocery_unique)} rows.")
## (Asian grocery store) Before dropping duplicated rows, there were 9 rows. After dropping them, there are 9 rows.
# 2. Flatten/unnest list-columns
# I have two list-columns: places.types and places.reviews
# 2.1 process places.types
grocery_unique$places.types[[1]]
## [1] "market" "asian_grocery_store" "grocery_store"
## [4] "food_store" "store" "food"
## [7] "point_of_interest" "establishment"
grocery_flat <- grocery_unique %>%
mutate(places.types = places.types %>%
map_chr(~ str_c(.x, collapse = ",")))
head(grocery_flat$places.types)
## [1] "market,asian_grocery_store,grocery_store,food_store,store,food,point_of_interest,establishment"
## [2] "asian_grocery_store,supermarket,grocery_store,food_store,store,food,point_of_interest,establishment"
## [3] "asian_grocery_store,grocery_store,food_store,store,food,point_of_interest,establishment"
## [4] "store,asian_grocery_store,grocery_store,food_store,food,point_of_interest,establishment"
## [5] "supermarket,asian_grocery_store,grocery_store,food_store,store,food,point_of_interest,establishment"
## [6] "asian_grocery_store,supermarket,grocery_store,food_store,store,food,point_of_interest,establishment"
# 2.2 process places.reviews
grocery_flat$places.reviews[[1]]
## name
## 1 places/ChIJt8RzSHh644kRLJD0BSpMMzs/reviews/ChZDSUhNMG9nS0VJQ0FnSUNMb2I2a1RBEAE
## 2 places/ChIJt8RzSHh644kRLJD0BSpMMzs/reviews/ChdDSUhNMG9nS0VJQ0FnSUNtXzYyQmtBRRAB
## 3 places/ChIJt8RzSHh644kRLJD0BSpMMzs/reviews/ChZDSUhNMG9nS0VJQ0FnSUN3a2JlWlN3EAE
## 4 places/ChIJt8RzSHh644kRLJD0BSpMMzs/reviews/ChdDSUhNMG9nS0VJQ0FnSURpOUkyRTR3RRAB
## 5 places/ChIJt8RzSHh644kRLJD0BSpMMzs/reviews/ChdDSUhNMG9nS0VJQ0FnSURDdjZuRHdBRRAB
## relativePublishTimeDescription rating publishTime
## 1 a year ago 5 2024-06-19T23:56:37.695482Z
## 2 3 years ago 5 2022-01-14T23:37:59.436873Z
## 3 10 years ago 5 2014-11-18T20:23:48.778Z
## 4 4 years ago 3 2020-11-26T18:34:05.645401Z
## 5 4 years ago 5 2020-10-17T07:15:56.431054Z
## flagContentUri
## 1 https://www.google.com/local/review/rap/report?postId=ChZDSUhNMG9nS0VJQ0FnSUNMb2I2a1RBEAE&d=17924085&t=1
## 2 https://www.google.com/local/review/rap/report?postId=ChdDSUhNMG9nS0VJQ0FnSUNtXzYyQmtBRRAB&d=17924085&t=1
## 3 https://www.google.com/local/review/rap/report?postId=ChZDSUhNMG9nS0VJQ0FnSUN3a2JlWlN3EAE&d=17924085&t=1
## 4 https://www.google.com/local/review/rap/report?postId=ChdDSUhNMG9nS0VJQ0FnSURpOUkyRTR3RRAB&d=17924085&t=1
## 5 https://www.google.com/local/review/rap/report?postId=ChdDSUhNMG9nS0VJQ0FnSURDdjZuRHdBRRAB&d=17924085&t=1
## googleMapsUri
## 1 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VJQ0FnSUNMb2I2a1RBEAE!2m1!1s0x89e37a784873c4b7:0x3b334c2a05f4902c
## 2 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChdDSUhNMG9nS0VJQ0FnSUNtXzYyQmtBRRAB!2m1!1s0x89e37a784873c4b7:0x3b334c2a05f4902c
## 3 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VJQ0FnSUN3a2JlWlN3EAE!2m1!1s0x89e37a784873c4b7:0x3b334c2a05f4902c
## 4 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChdDSUhNMG9nS0VJQ0FnSURpOUkyRTR3RRAB!2m1!1s0x89e37a784873c4b7:0x3b334c2a05f4902c
## 5 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChdDSUhNMG9nS0VJQ0FnSURDdjZuRHdBRRAB!2m1!1s0x89e37a784873c4b7:0x3b334c2a05f4902c
## text.text
## 1 This is the best market to go to in Chinatown if you're looking to pick up fresh seafood. Yes the seafood here may be pricier than other markets around in Chinatown (e.g the live scallops are sold at $6.99 a pound here whereas Cmart down the street has them for $4.99 a pound) but the quality here is far superior and worth every penny.\n\nThe market is really small and they only specialize in selling one thing... seafood! One of the reasons I think the quality is significantly better here is that they only receive very small shipments of seafood each day so the turnover is high and so you're always getting the freshest seafood available.\n\nLove this place!
## 2 I've been here twice and they are amazing! We bought some live lovster which were delicious. The fresh oysters are delicious! Super friendly!
## 3 Very good seafood here. much better quality than other grocery store.
## 4 Place too small but can get any kind seafood and fresh..
## 5 Tiny store but packed with fresh Seafoods.
## text.languageCode
## 1 en
## 2 en
## 3 en
## 4 en
## 5 en
## originalText.text
## 1 This is the best market to go to in Chinatown if you're looking to pick up fresh seafood. Yes the seafood here may be pricier than other markets around in Chinatown (e.g the live scallops are sold at $6.99 a pound here whereas Cmart down the street has them for $4.99 a pound) but the quality here is far superior and worth every penny.\n\nThe market is really small and they only specialize in selling one thing... seafood! One of the reasons I think the quality is significantly better here is that they only receive very small shipments of seafood each day so the turnover is high and so you're always getting the freshest seafood available.\n\nLove this place!
## 2 I've been here twice and they are amazing! We bought some live lovster which were delicious. The fresh oysters are delicious! Super friendly!
## 3 Very good seafood here. much better quality than other grocery store.
## 4 Place too small but can get any kind seafood and fresh..
## 5 Tiny store but packed with fresh Seafoods.
## originalText.languageCode authorAttribution.displayName
## 1 en Eva Yuen
## 2 en Michael lewandowski
## 3 en Da Huang
## 4 en Tak Hing
## 5 en Sieu Nhan
## authorAttribution.uri
## 1 https://www.google.com/maps/contrib/108706777465914056073/reviews
## 2 https://www.google.com/maps/contrib/110465398503808148687/reviews
## 3 https://www.google.com/maps/contrib/106371705848290808624/reviews
## 4 https://www.google.com/maps/contrib/106400717313026803429/reviews
## 5 https://www.google.com/maps/contrib/101987590175193253847/reviews
## authorAttribution.photoUri
## 1 https://lh3.googleusercontent.com/a/ACg8ocKz2LGoq9jDni3z0-gc0_fY3aecDx7Mjn-IQwL697tAoLWZ1PJw=s128-c0x00000000-cc-rp-mo-ba3
## 2 https://lh3.googleusercontent.com/a/ACg8ocItpcOm-hu0jQREtK2gjJSto5OBLvhHAdWFXr61HVC5FuQESQ=s128-c0x00000000-cc-rp-mo
## 3 https://lh3.googleusercontent.com/a-/ALV-UjUW8cQnOIf7mJxc5vjTtY3qf1ADaiePbm4U3A20sI7FubM9KPlS=s128-c0x00000000-cc-rp-mo
## 4 https://lh3.googleusercontent.com/a/ACg8ocLlzT1_qH_omkXFkFnv7AMQ8jXYLSJekz_zj6BvIh8Kzdm1ow=s128-c0x00000000-cc-rp-mo
## 5 https://lh3.googleusercontent.com/a/ACg8ocJoksQ_cSQweOcpR6koB2ex7fQTPqcRzXYm08isr0mScyG4aA=s128-c0x00000000-cc-rp-mo-ba4
grocery_reviews <- grocery_flat %>% select(places.id, places.reviews)
# Convert `reviews` into a long format
grocery_reviews <- grocery_reviews %>%
unnest_longer(places.reviews) %>% # one row per review
unnest_wider(places.reviews) # expand review details into columns
# Remove the now-redundant reviews column from the main POI dataset
grocery_flat <- grocery_flat %>% select(-places.reviews)
# 3. Handle missing values
# Count the number of NAs in each column
grocery_flat %>% map_dbl(., ~sum(is.na(.x)))
## places.id
## 0
## places.types
## 0
## places.formattedAddress
## 0
## places.rating
## 1
## places.businessStatus
## 0
## places.userRatingCount
## 1
## places.primaryType
## 0
## places.location.latitude
## 0
## places.location.longitude
## 0
## places.displayName.text
## 0
## places.displayName.languageCode
## 0
## places.reviewSummary.flagContentUri
## 7
## places.reviewSummary.reviewsUri
## 7
## places.reviewSummary.text.text
## 7
## places.reviewSummary.text.languageCode
## 7
## places.reviewSummary.disclosureText.text
## 7
## places.reviewSummary.disclosureText.languageCode
## 7
Essential columns such as places.id
and
places.types
do not contain any missing values. However,
places.rating
is an important variable because it directly
reflects the quality and popularity of a place. I plan to use it as a
key criterion for deciding which places I would visit, so rows with
missing values in this column would not be useful for my analysis.
# Drop rows that have missing values in places.rating
grocery_dropna <- grocery_flat %>%
drop_na(places.rating)
print(paste0("Before: ", nrow(grocery_flat)))
## [1] "Before: 9"
print(paste0("Before: ", nrow(grocery_dropna)))
## [1] "Before: 8"
# 4. Filter by location
# city boundary
boston <- tigris::places("MA", progress_bar = FALSE) %>%
filter(NAME == 'Boston') %>%
st_transform(4326)
# Converting grocery_dropna into a sf object
grocery_sf <- grocery_dropna %>%
st_as_sf(coords=c("places.location.longitude", "places.location.latitude"),
crs = 4326)
# groceries within the city boundary
grocery_sf_in <- grocery_sf[boston, ]
print(paste0("Before: ", nrow(grocery_sf)))
## [1] "Before: 8"
print(paste0("After: ", nrow(grocery_sf_in)))
## [1] "After: 8"
# comparing messing and tidy data
glue::glue("number of rows before: {nrow(asian_grocery_store)} -> after: {nrow(grocery_sf_in)} \n
number of columns before: {ncol(asian_grocery_store)} -> after: {ncol(grocery_sf_in)} \n")
## number of rows before: 9 -> after: 8
##
## number of columns before: 18 -> after: 16
I subsequently applied the same cleaning steps and reasoning to the
hiking area
type data to ensure consistency across
datasets.
# 1. Remove duplicated rows
hiking_unique <- hiking_area %>% distinct(places.id, .keep_all=T)
# Show how the number of rows has changed after removing
glue::glue("(Hiking area) Before dropping duplicated rows, there were {nrow(hiking_area)} rows. After dropping them, there are {nrow(hiking_unique)} rows.")
## (Hiking area) Before dropping duplicated rows, there were 31 rows. After dropping them, there are 10 rows.
# 2. Flatten/unnest list-columns
# I have two list-columns: places.types and places.reviews
# 2.1 process places.types
hiking_unique$places.types[[1]]
## [1] "tourist_attraction" "hiking_area"
## [3] "state_park" "park"
## [5] "sports_activity_location" "point_of_interest"
## [7] "establishment"
hiking_flat <- hiking_unique %>%
mutate(places.types = places.types %>%
map_chr(~ str_c(.x, collapse = ",")))
head(hiking_flat$places.types)
## [1] "tourist_attraction,hiking_area,state_park,park,sports_activity_location,point_of_interest,establishment"
## [2] "hiking_area,park,sports_activity_location,point_of_interest,establishment"
## [3] "hiking_area,park,sports_activity_location,point_of_interest,establishment"
## [4] "hiking_area,park,sports_activity_location,point_of_interest,establishment"
## [5] "hiking_area,park,sports_activity_location,point_of_interest,establishment"
## [6] "hiking_area,park,sports_activity_location,point_of_interest,establishment"
# 2.2 process places.reviews
hiking_flat$places.reviews[[1]]
## name
## 1 places/ChIJPyGMcS9u44kRja36dM05hEQ/reviews/Ci9DQUlRQUNvZENodHljRjlvT2xOS09YRndjbWh3Vm10a05XSkZNalExUkc1UVVWRRAB
## 2 places/ChIJPyGMcS9u44kRja36dM05hEQ/reviews/Ci9DQUlRQUNvZENodHljRjlvT25OdWRFeEliemR3U2pFekxUSlJkbEUxWVdWVVIwRRAB
## 3 places/ChIJPyGMcS9u44kRja36dM05hEQ/reviews/Ci9DQUlRQUNvZENodHljRjlvT2xaUlZIaFplamRNTTAxamJGa3daalUzTVZsMVNVRRAB
## 4 places/ChIJPyGMcS9u44kRja36dM05hEQ/reviews/ChZDSUhNMG9nS0VPYWxfdS1nMGJxSVVBEAE
## 5 places/ChIJPyGMcS9u44kRja36dM05hEQ/reviews/ChZDSUhNMG9nS0VJQ0FnSUNINThxeld3EAE
## relativePublishTimeDescription rating publishTime
## 1 2 months ago 5 2025-07-13T19:03:11.750444833Z
## 2 2 months ago 5 2025-06-30T16:49:43.712264971Z
## 3 3 weeks ago 5 2025-08-18T02:33:49.801397839Z
## 4 3 months ago 2 2025-05-25T03:13:10.364995Z
## 5 a year ago 5 2024-09-09T19:15:30.367116Z
## flagContentUri
## 1 https://www.google.com/local/review/rap/report?postId=Ci9DQUlRQUNvZENodHljRjlvT2xOS09YRndjbWh3Vm10a05XSkZNalExUkc1UVVWRRAB&d=17924085&t=1
## 2 https://www.google.com/local/review/rap/report?postId=Ci9DQUlRQUNvZENodHljRjlvT25OdWRFeEliemR3U2pFekxUSlJkbEUxWVdWVVIwRRAB&d=17924085&t=1
## 3 https://www.google.com/local/review/rap/report?postId=Ci9DQUlRQUNvZENodHljRjlvT2xaUlZIaFplamRNTTAxamJGa3daalUzTVZsMVNVRRAB&d=17924085&t=1
## 4 https://www.google.com/local/review/rap/report?postId=ChZDSUhNMG9nS0VPYWxfdS1nMGJxSVVBEAE&d=17924085&t=1
## 5 https://www.google.com/local/review/rap/report?postId=ChZDSUhNMG9nS0VJQ0FnSUNINThxeld3EAE&d=17924085&t=1
## googleMapsUri
## 1 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sCi9DQUlRQUNvZENodHljRjlvT2xOS09YRndjbWh3Vm10a05XSkZNalExUkc1UVVWRRAB!2m1!1s0x89e36e2f718c213f:0x448439cd74faad8d
## 2 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sCi9DQUlRQUNvZENodHljRjlvT25OdWRFeEliemR3U2pFekxUSlJkbEUxWVdWVVIwRRAB!2m1!1s0x89e36e2f718c213f:0x448439cd74faad8d
## 3 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sCi9DQUlRQUNvZENodHljRjlvT2xaUlZIaFplamRNTTAxamJGa3daalUzTVZsMVNVRRAB!2m1!1s0x89e36e2f718c213f:0x448439cd74faad8d
## 4 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VPYWxfdS1nMGJxSVVBEAE!2m1!1s0x89e36e2f718c213f:0x448439cd74faad8d
## 5 https://www.google.com/maps/reviews/data=!4m6!14m5!1m4!2m3!1sChZDSUhNMG9nS0VJQ0FnSUNINThxeld3EAE!2m1!1s0x89e36e2f718c213f:0x448439cd74faad8d
## text.text
## 1 Wow! This place is spectacularly gorgeous! I especially love the wildflowers and walkway, and that no dogs are allowed. There are so few places where dogs are not allowed and I can let down my guard and just enjoy!\n\nThere were butterflies and hummingbirds and bees and dragonflies…it was magnificent. Toward the end I did pop my head into the marsh areas. This was a terrible mistake! There were swarms of mosquitoes like I’ve never seen!!! I was mobbed and even as I squashed at least a dozen, more bit me on my face, neck, chest, arms, and legs. I had to hustle away! Wear spray!!!!!\n\nI appreciate that the .5 mile trail around is super accessible, dogs must be leashed on it, and there is even a handicapped accessible portable toilet!\n\nThere are benches and picnic tables and the breeze is spectacularly refreshing. Some good shade too!
## 2 I’m a photographer and i absolutely love having sessions here. This place is huge with so much grounds to explore!\n\nFree parking\n\nApproach entrance slowly as it is so easy to miss.\n\nNote the park doesnt actually close at 4, it closes at dusk!\n\nFollow my photography on IG @lnportraits.co
## 3 This park is such a hidden gem and it’s great for a picnic day with friends or just a nice walk to clear your head.
## 4 Many mosquitoes I recommend not wearing short clothing, please do not bring your pets here after a walk with my dog when I returned home I found 12 ticks to be exact.
## 5 This is a beautiful ecosystem. It's so hard finding a place like this in Boston. Matter of fact, there are no other places like this in Boston. The silence, the smell of nature, the hints and peeks of wildlife around you, it's all just a wonderful escape from the hustle and bustle of the city. It's been a long time since I've been in a place that's so alive. Parks and forests lack the biodiversity and fauna that the marsh biome provides. Make yourself a favor and take some time to disconnect and explore this site. Make sure to go into the meadow, too! (That's where the wildlife is)
## text.languageCode
## 1 en
## 2 en
## 3 en
## 4 en
## 5 en
## originalText.text
## 1 Wow! This place is spectacularly gorgeous! I especially love the wildflowers and walkway, and that no dogs are allowed. There are so few places where dogs are not allowed and I can let down my guard and just enjoy!\n\nThere were butterflies and hummingbirds and bees and dragonflies…it was magnificent. Toward the end I did pop my head into the marsh areas. This was a terrible mistake! There were swarms of mosquitoes like I’ve never seen!!! I was mobbed and even as I squashed at least a dozen, more bit me on my face, neck, chest, arms, and legs. I had to hustle away! Wear spray!!!!!\n\nI appreciate that the .5 mile trail around is super accessible, dogs must be leashed on it, and there is even a handicapped accessible portable toilet!\n\nThere are benches and picnic tables and the breeze is spectacularly refreshing. Some good shade too!
## 2 I’m a photographer and i absolutely love having sessions here. This place is huge with so much grounds to explore!\n\nFree parking\n\nApproach entrance slowly as it is so easy to miss.\n\nNote the park doesnt actually close at 4, it closes at dusk!\n\nFollow my photography on IG @lnportraits.co
## 3 This park is such a hidden gem and it’s great for a picnic day with friends or just a nice walk to clear your head.
## 4 Many mosquitoes I recommend not wearing short clothing, please do not bring your pets here after a walk with my dog when I returned home I found 12 ticks to be exact.
## 5 This is a beautiful ecosystem. It's so hard finding a place like this in Boston. Matter of fact, there are no other places like this in Boston. The silence, the smell of nature, the hints and peeks of wildlife around you, it's all just a wonderful escape from the hustle and bustle of the city. It's been a long time since I've been in a place that's so alive. Parks and forests lack the biodiversity and fauna that the marsh biome provides. Make yourself a favor and take some time to disconnect and explore this site. Make sure to go into the meadow, too! (That's where the wildlife is)
## originalText.languageCode authorAttribution.displayName
## 1 en Kristy Johnson
## 2 en Leah T.
## 3 en Xabiba ladan
## 4 en Liz Arroyo
## 5 en Francisco Verón Ferreira
## authorAttribution.uri
## 1 https://www.google.com/maps/contrib/116090463145785638796/reviews
## 2 https://www.google.com/maps/contrib/105877196373829142845/reviews
## 3 https://www.google.com/maps/contrib/112675678493311617883/reviews
## 4 https://www.google.com/maps/contrib/111403679719931559592/reviews
## 5 https://www.google.com/maps/contrib/110198590269583947181/reviews
## authorAttribution.photoUri
## 1 https://lh3.googleusercontent.com/a-/ALV-UjX0mFaowRCN0U3WhbIUa0ZYRWA8aggKA7B_z6SBlujY08-6cSTH=s128-c0x00000000-cc-rp-mo-ba7
## 2 https://lh3.googleusercontent.com/a-/ALV-UjWJ02ZXBpyJlxgZAIUKeUjEcpwX64K5BJ-63FW-fDjnBgZbH0AsKg=s128-c0x00000000-cc-rp-mo-ba5
## 3 https://lh3.googleusercontent.com/a-/ALV-UjUIEzxZQAX-lILNAnNvvZ67zx69PjLswlWmvPmsOM9LikBdvlOfDQ=s128-c0x00000000-cc-rp-mo-ba2
## 4 https://lh3.googleusercontent.com/a-/ALV-UjXXuXe5gHNL3_4r2SiCy4PB310dpRy-fn6wxTAPpH4f7dnAg6j5aQ=s128-c0x00000000-cc-rp-mo
## 5 https://lh3.googleusercontent.com/a-/ALV-UjX-ntpBwILTTdPaoE08inoccLWJMOV0jjUTSfSlzBXh5eHqN2O3qQ=s128-c0x00000000-cc-rp-mo-ba5
hiking_reviews <- hiking_flat %>% select(places.id, places.reviews)
# Convert `reviews` into a long format
hiking_reviews <- hiking_reviews %>%
unnest_longer(places.reviews) %>% # one row per review
unnest_wider(places.reviews) # expand review details into columns
# Remove the now-redundant reviews column from the main POI dataset
hiking_flat <- hiking_flat %>% select(-places.reviews)
# 3. Handle missing values
# Count the number of NAs in each column
hiking_flat %>% map_dbl(., ~sum(is.na(.x)))
## places.id
## 0
## places.types
## 0
## places.formattedAddress
## 0
## places.rating
## 2
## places.businessStatus
## 0
## places.userRatingCount
## 2
## places.primaryType
## 0
## places.location.latitude
## 0
## places.location.longitude
## 0
## places.displayName.text
## 0
## places.displayName.languageCode
## 0
## places.reviewSummary.flagContentUri
## 9
## places.reviewSummary.reviewsUri
## 9
## places.reviewSummary.text.text
## 9
## places.reviewSummary.text.languageCode
## 9
## places.reviewSummary.disclosureText.text
## 9
## places.reviewSummary.disclosureText.languageCode
## 9
# Drop rows that have missing values in places.rating
hiking_dropna <- hiking_flat %>%
drop_na(places.rating)
print(paste0("Before: ", nrow(hiking_flat)))
## [1] "Before: 10"
print(paste0("Before: ", nrow(hiking_dropna)))
## [1] "Before: 8"
# 4. Filter by location
# Converting hiking_dropna into a sf object
hiking_sf <- hiking_dropna %>%
st_as_sf(coords=c("places.location.longitude", "places.location.latitude"),
crs = 4326)
# groceries within the city boundary
hiking_sf_in <- hiking_sf[boston, ]
print(paste0("Before: ", nrow(hiking_sf)))
## [1] "Before: 8"
print(paste0("After: ", nrow(hiking_sf_in)))
## [1] "After: 8"
# comparing messing and tidy data
glue::glue("number of rows before: {nrow(hiking_area)} -> after: {nrow(hiking_sf_in)} \n
number of columns before: {ncol(hiking_area)} -> after: {ncol(hiking_sf_in)} \n")
## number of rows before: 31 -> after: 8
##
## number of columns before: 18 -> after: 16
Print the first 10 rows of your final dataset using either
print()
or kableExtra::kable()
.
# print the final asian grocery store dataset
print(head(grocery_sf_in, 5))
## Simple feature collection with 5 features and 15 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -71.06564 ymin: 42.34422 xmax: -71.06013 ymax: 42.35149
## Geodetic CRS: WGS 84
## places.id
## 1 ChIJt8RzSHh644kRLJD0BSpMMzs
## 2 ChIJs8yo23F644kRScIIr53rSt0
## 3 ChIJz9ng2HF644kRBdSDKVCXncM
## 4 ChIJ8SuAFXh644kRPe0BKddE8uY
## 5 ChIJzwcTqXF644kRnLFDPZEwxTQ
## places.types
## 1 market,asian_grocery_store,grocery_store,food_store,store,food,point_of_interest,establishment
## 2 asian_grocery_store,supermarket,grocery_store,food_store,store,food,point_of_interest,establishment
## 3 asian_grocery_store,grocery_store,food_store,store,food,point_of_interest,establishment
## 4 store,asian_grocery_store,grocery_store,food_store,food,point_of_interest,establishment
## 5 supermarket,asian_grocery_store,grocery_store,food_store,store,food,point_of_interest,establishment
## places.formattedAddress places.rating
## 1 11 Hudson St Unit #A, Boston, MA 02111, USA 4.1
## 2 1102 Washington St, Boston, MA 02118, USA 4.0
## 3 1102 Washington St, Boston, MA 02118, USA 4.5
## 4 34 Oxford St, Boston, MA 02111, USA 3.5
## 5 50 Herald St, Boston, MA 02118, USA 3.9
## places.businessStatus places.userRatingCount places.primaryType
## 1 OPERATIONAL 20 market
## 2 OPERATIONAL 801 asian_grocery_store
## 3 OPERATIONAL 4 asian_grocery_store
## 4 OPERATIONAL 4 store
## 5 OPERATIONAL 1058 supermarket
## places.displayName.text places.displayName.languageCode
## 1 Happy Family Food Market en
## 2 New Ming en
## 3 New York Marts en
## 4 Delight Corner en
## 5 C-Mart Supermarket en
## places.reviewSummary.flagContentUri
## 1 <NA>
## 2 https://www.google.com/local/review/rap/report?postId=5%401:CAIQACodChtyc19oOlQyTVMxZW9FNlFfZ3htZXJXSS1RSFE%7CCAIQACorChtyc19oOlQyTVMxZW9FNlFfZ3htZXJXSS1RSFESDAi_od_FBhCv-Z2VAQ&d=17924085&t=8
## 3 <NA>
## 4 <NA>
## 5 <NA>
## places.reviewSummary.reviewsUri
## 1 <NA>
## 2 https://www.google.com/maps/place//data=!4m4!3m3!1s0x89e37a71dba8ccb3:0xdd4aeb9daf08c249!9m1!1b1
## 3 <NA>
## 4 <NA>
## 5 <NA>
## places.reviewSummary.text.text
## 1 <NA>
## 2 People say this Asian grocery store offers a wide variety of fresh vegetables, seafood, and other Asian foods. They also highlight the reasonable prices and spacious, comfortable shopping environment.\n\nOther reviews mention the selection can be overwhelming.
## 3 <NA>
## 4 <NA>
## 5 <NA>
## places.reviewSummary.text.languageCode
## 1 <NA>
## 2 en-US
## 3 <NA>
## 4 <NA>
## 5 <NA>
## places.reviewSummary.disclosureText.text
## 1 <NA>
## 2 Summarized with Gemini
## 3 <NA>
## 4 <NA>
## 5 <NA>
## places.reviewSummary.disclosureText.languageCode geometry
## 1 <NA> POINT (-71.06018 42.35086)
## 2 en-US POINT (-71.06563 42.34423)
## 3 <NA> POINT (-71.06564 42.34422)
## 4 <NA> POINT (-71.06013 42.35149)
## 5 <NA> POINT (-71.06503 42.34636)
# print the final hiking area dataset
print(head(hiking_sf_in, 5))
## Simple feature collection with 5 features and 15 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -71.06374 ymin: 42.31083 xmax: -70.99343 ymax: 42.3915
## Geodetic CRS: WGS 84
## places.id
## 1 ChIJPyGMcS9u44kRja36dM05hEQ
## 2 ChIJY5uF9FZx44kR7FEMLvXFKPI
## 3 ChIJA6R6GQBx44kR__ctbeCZJKI
## 4 ChIJJRJ0dU1744kRePYkWdZE8y0
## 5 ChIJeQ7A1FJx44kRYP7JFjcpOH8
## places.types
## 1 tourist_attraction,hiking_area,state_park,park,sports_activity_location,point_of_interest,establishment
## 2 hiking_area,park,sports_activity_location,point_of_interest,establishment
## 3 hiking_area,park,sports_activity_location,point_of_interest,establishment
## 4 hiking_area,park,sports_activity_location,point_of_interest,establishment
## 5 hiking_area,park,sports_activity_location,point_of_interest,establishment
## places.formattedAddress places.rating
## 1 1399 Bennington St, Boston, MA 02128, USA 4.5
## 2 E Boston Greenway Connector, Boston, MA 02128, USA 5.0
## 3 Pier 4 Blvd., Boston, MA 02210, USA 5.0
## 4 Boston, MA 02125, USA 5.0
## 5 Boston, MA 02113, USA 4.8
## places.businessStatus places.userRatingCount places.primaryType
## 1 OPERATIONAL 554 park
## 2 OPERATIONAL 3 hiking_area
## 3 OPERATIONAL 8 hiking_area
## 4 OPERATIONAL 2 hiking_area
## 5 OPERATIONAL 39 hiking_area
## places.displayName.text places.displayName.languageCode
## 1 Belle Isle Marsh Reservation en
## 2 East Boston Greenway Trailhead en
## 3 Viewpoint pl
## 4 Hilltop at Savin Hill Park en
## 5 Freedom Trail en
## places.reviewSummary.flagContentUri places.reviewSummary.reviewsUri
## 1 <NA> <NA>
## 2 <NA> <NA>
## 3 <NA> <NA>
## 4 <NA> <NA>
## 5 <NA> <NA>
## places.reviewSummary.text.text places.reviewSummary.text.languageCode
## 1 <NA> <NA>
## 2 <NA> <NA>
## 3 <NA> <NA>
## 4 <NA> <NA>
## 5 <NA> <NA>
## places.reviewSummary.disclosureText.text
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 <NA>
## 5 <NA>
## places.reviewSummary.disclosureText.languageCode geometry
## 1 <NA> POINT (-70.99343 42.3915)
## 2 <NA> POINT (-71.01113 42.38425)
## 3 <NA> POINT (-71.0411 42.35365)
## 4 <NA> POINT (-71.04961 42.31083)
## 5 <NA> POINT (-71.06374 42.35539)
Write about at least four interesting observations you discovered (maximum 200 words). Include plots or maps if helpful. Example questions you might explore include:
What are the most noticeable differences between the two POI types?
What is the average rating score? Does it seem related to the number of ratings?
Is there an association between price level and rating score?
Is there any connection between POI rating scores and household income?
Do POIs tend to cluster in specific neighborhoods, or are they spread evenly across the city?
If you had to choose one POI to visit based on the dataset, which would you pick and why?
Note: The questions above are only examples–feel free to be creative!
Response: The total word count of observations is 188 (excluding the subheading).
As shown in the following maps, asian grocery stores are clustered in certain urban areas, such as China town, while hiking areas are primarily distributed within green spaces.
# Visualize
tmap_mode("view")
tm_shape(boston) +
tm_borders() +
tm_shape(grocery_sf_in) +
tm_dots(shape = 21,
col = "black",
lwd = 1, #
fill = "places.rating",
fill.scale = tm_scale_continuous(values = "magma"),
size = "places.userRatingCount",
popup.vars = c("Name" = "places.displayName.text",
"Rating" = "places.rating",
"Rating Count" = "places.userRatingCount"))
# Visualize
tmap_mode("view")
tm_shape(boston) +
tm_borders() +
tm_shape(hiking_sf_in) +
tm_dots(shape = 21,
col = "black",
lwd = 1,
fill = "places.rating",
fill.scale = tm_scale_continuous(values = "cividis"),
size = "places.userRatingCount",
popup.vars = c("Name" = "places.displayName.text",
"Rating" = "places.rating",
"Rating Count" = "places.userRatingCount"))
As shown in the following map, among all hiking areas, only two locations have more than 200 reviews. Notably, the site (Boston Common) located closer to the downtown has nearly twice as many reviews as the suburban site, indicating a stronger concentration of user activity in the urban core.
hiking_sf_in_binary = hiking_sf_in %>%
# Create a new variable using mutate
mutate(review_count_binary = case_when(
places.userRatingCount > 200 ~ "many",
places.userRatingCount <= 200 ~ "few"
)) %>%
select(places.userRatingCount, review_count_binary)
tm_shape(boston) +
tm_borders() +
tm_shape(hiking_sf_in_binary) +
tm_symbols(
col = "review_count_binary",
shape = "review_count_binary",
shapes = c("many" = 25, "few" = 19),
palette = c("many" = "tomato", "few" = "grey"),
title.col = "Review Count",
title.shape = "Review Count"
) +
tm_layout(legend.outside = TRUE)
The results show that the overall quality of hiking areas is high, with an average rating of 4.8 and a weighted average of 4.69. This indicates that most sites are well regarded, but the slight drop in the weighted score highlights the importance of quality improvements at popular sites, since they influence overall perception the most.
# calculate the average rating of hiking areas
cat("Normal average rating:", mean(hiking_sf_in$places.rating, na.rm = TRUE))
## Normal average rating: 4.8
# calculate the weighted average rating of hiking areas
cat("Weighted average rating:",
sum(hiking_sf_in$places.rating * hiking_sf_in$places.userRatingCount, na.rm = TRUE) /
sum(hiking_sf_in$places.userRatingCount, na.rm = TRUE))
## Weighted average rating: 4.693316
As shown in the histogram and scatter plot, asian grocery stores generally maintain ratings between 3.50 and 4.50. However, review counts differ substantially: while some stores with mid-to-high ratings attract hundreds to over a thousand reviews, others with similar ratings receive very few. This indicates that customer volume is influenced by factors beyond rating alone.
# histogram
hist(grocery_sf_in$places.rating)
# scatter plot
ggplot(grocery_sf_in, aes(x = places.rating, y = places.userRatingCount)) +
geom_point(color = "blue", size = 3) +
labs(x = "Rating", y = "userRatingCount", title = "Scatter Plot") +
theme_minimal()