The goal of this document is to review some mapping strategies in R, and ultimately decide how I want to map Nice Ride data in Minneapolis.
library(tidyverse)
library(leaflet)
library(sf)
library(albersusa)
library(readxl)
food <- read_sf("Data/Shape/Food_Inspections.shp")
First, I’ll replicate some of the food analysis for a refresher.
CapStr <- function(y) {
c <- strsplit(y, " ")[[1]]
paste(toupper(substring(c, 1,1)), substring(c, 2),
sep="", collapse=" ")
}
inspections_map <- food %>%
select(BusinessNa, FullAddres, Inspection, DateOfInsp, Neighborho, Inspecti_3, Latitude, Longitude, geometry) %>%
rename(Business= BusinessNa,
Address= FullAddres,
NBHD= Neighborho,
Score= Inspecti_3)
inspections_map <- inspections_map %>%
filter(Inspection== "Routine",
NBHD != "NA") %>%
separate(DateOfInsp, c("Date", "Time"), "T") %>%
distinct(Business, Address, Date,.keep_all = TRUE) %>%
group_by(Business) %>%
arrange(desc(Date)) %>%
slice(which.max(as.Date(Date, '%Y-%m-%d'))) %>%
ungroup()
inspections_map$Business<- tolower(inspections_map$Business)
inspections_map$Business<- sapply(inspections_map$Business, CapStr)
inspections_map <- inspections_map %>%
group_by(NBHD)%>%
mutate(nbhd_score= mean(Score, na.rm = TRUE)) %>%
ungroup()
inspect<- inspections_map %>%
select(NBHD, nbhd_score) %>%
st_set_geometry(NULL)
Now add some neighborhood shapefiles.
neighborhoods <- read_sf("Data/Shape/Minneapolis_Neighborhoods.shp")
nbhd_map <- neighborhoods %>%
select(BDNAME, Shape_STAr, Shape_STLe, geometry) %>%
rename(NBHD= BDNAME) %>%
left_join(inspect, by="NBHD")
nbhd_map <- unique(nbhd_map)
#quantile(nbhd_map$nbhd_score, probs= seq(0,1,0.25))
nbhd2_pal= c("#f0f9e8", "#bae4bc", "#7bccc4", "#2b8cbe")
nbhd_bins= c(86, 93.9, 95.1, 96.4, 100)
nbhd_pal <- colorBin(palette = nbhd2_pal, domain = nbhd_map$nbhd_score, bins=round(nbhd_bins, digits = 1))
#quantile(inspections_map$Score, probs= seq(0,1,0.25))
rest2_pal= c("#ae017e", "#fbb4b9")
rest_bins= c(54, 90, 100)
rest_pal <- colorBin(palette = rest2_pal, domain = nbhd_map$nbhd_score, bins=round(rest_bins, digits = 1))
Now make a map. Can also remove the neighborhood average for a better look at where the dots are. Can use addCircleMarkers to add the docking stations, most likely.
leaflet(width = "100%") %>%
setView(-93.265015, 44.977753, zoom=11) %>%
addProviderTiles(providers$Esri.WorldGrayCanvas) %>%
addPolygons(data= nbhd_map,
group= "nbhd",
popup = paste("<strong>Neighborhood:</strong>", nbhd_map$NBHD, "<br>",
"<strong>Avg. Inspection Score:</strong>", round(nbhd_map$nbhd_score, digits=1)),
stroke = TRUE,
smoothFactor = 1.5,
weight = 2.5,
fillOpacity = 0.9,
opacity = 0.7,
color = "black",
fillColor = ~ nbhd_pal(nbhd_score)) %>%
addCircleMarkers(data= inspections_map,
popup = paste("<strong>Business:</strong>", inspections_map$Business, "<br>",
"<strong>Neighborhood:</strong>", inspections_map$NBHD, "<br>",
"<strong>Score:</strong>", inspections_map$Score),
radius=3,
stroke = FALSE,
fillOpacity = 0.9,
opacity = 0.7,
fillColor = rest_pal(inspections_map$Score)) #%>%
#addLegend(values = ~ nbhd_map$nbhd_score,
# group="nbhd",
# position = "bottomright",
# pal = rest_pal,
# title = "Average Score",
# opacity = 1)
Based on how the previous map looks, I won’t want to have overlay shapefiles. Rather, it’s better to rely on some base maps available straight from ESRI/Leaflet. Anyway, first I would like to test out a few different shapefiles, all from the Census Bureau. The previous neighborhood shapefile was from MN Open GIS I think.
Now, we can test out what a couple different base tiles look like. There is a good link here that covers some of them. You can use names(providers) to see all maps.
map <- leaflet(width = "100%") %>%
setView(-93.265015, 44.977753, zoom=11)
#base
map %>%
addTiles()
map %>%
addProviderTiles(providers$Stamen.Toner)
map %>%
addProviderTiles(providers$CartoDB.Positron)
#map %>%
#addProviderTiles(providers$CartoDB.Positron) %>%
#addProviderTiles(providers$Jawg.Terrain)
#map %>%
#addProviderTiles(providers$Stamen.WaterColor)
#map %>%
#addProviderTiles(providers$ESRI.WorldImagery)
#map %>%
#addProviderTiles(providers$ESRI.NatGeoWorldMap)
#map %>%
#addProviderTiles(providers$NASAGIBS.ViirsEarthAtNight2012)
#map %>%
#addProviderTiles(providers$JusticeMap)
#map %>%
#addProviderTiles(providers$HikeBike)
#Thunderforest.OpenCycleMap
#USGS.USTopo
#USGS.USImagery
You can also layer tiles. This could be useful for putting a mostly transparent layer over another map
map %>%
addProviderTiles(providers$MtbMap) %>%
addProviderTiles(providers$Stamen.TonerLines,
options = providerTileOptions(opacity = 0.35)) %>%
addProviderTiles(providers$Stamen.TonerLabels)
https://leaflet-extras.github.io/leaflet-providers/preview/
library(janitor)
## Warning: package 'janitor' was built under R version 3.6.3
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
tripdata <- read_csv("Data/202008-niceride-tripdata.csv")
##
## -- Column specification -----------------------------------------------------------------------------
## cols(
## ride_id = col_character(),
## rideable_type = col_character(),
## started_at = col_datetime(format = ""),
## ended_at = col_datetime(format = ""),
## start_station_name = col_character(),
## start_station_id = col_double(),
## end_station_name = col_character(),
## end_station_id = col_double(),
## start_lat = col_double(),
## start_lng = col_double(),
## end_lat = col_double(),
## end_lng = col_double(),
## member_casual = col_character()
## )
distribution_data <- read_xlsx("Data/Nice-Ride-Bike-Distribution-Data.xlsx")
#Scooter data is also available here: #Scooter data is also available here: https://opendata.minneapolismn.gov/search?groupIds=9a38073875844f16a93416bf6bd7df5b&q=scooter7777777
stations <- tripdata %>%
select(start_station_name, start_lat, start_lng) %>%
filter(!is.na(start_station_name)) %>%
distinct()
#this doesn't work. there are small long/lat difs i.e. the code below
#could either round to a less precise point, or manage to set all of them as equal
#easiest might be to group by station and take mean, na.rm=TRUE
stations %>% filter(start_station_name=="McNamara Center") %>% View()
#1: Rounding
stations_round <- stations %>%
mutate(start_lat= round(start_lat, digits=3),
start_lng= round(start_lng, digits=3)) %>%
distinct()
stations_round %>%
#select() %>%
get_dupes(start_station_name)
## # A tibble: 186 x 4
## start_station_name dupe_count start_lat start_lng
## <chr> <int> <dbl> <dbl>
## 1 15th Ave SE & Como Ave SE 3 45.0 -93.2
## 2 15th Ave SE & Como Ave SE 3 45.0 -93.2
## 3 15th Ave SE & Como Ave SE 3 45.0 -93.2
## 4 18th Ave Trail & 2nd Street NE 2 45.0 -93.3
## 5 18th Ave Trail & 2nd Street NE 2 45.0 -93.3
## 6 23rd Ave SE & University Ave SE 2 45.0 -93.2
## 7 23rd Ave SE & University Ave SE 2 45.0 -93.2
## 8 26th Street & Hennepin 2 45.0 -93.3
## 9 26th Street & Hennepin 2 45.0 -93.3
## 10 26th Street & Lyndale 3 45.0 -93.3
## # ... with 176 more rows
#2. Mean
stations_mean <- stations %>%
group_by(start_station_name) %>%
summarise(
start_lat= mean(start_lat),
start_lng= mean(start_lng))
## `summarise()` ungrouping output (override with `.groups` argument)
# this should work best.
#Now, let's create a final frame that has a count of trips taken from each place
#when mapping, we may drop stations below a certain number
stations_final <- tripdata %>%
filter(!is.na(start_station_name)) %>%
group_by(start_station_name) %>%
summarise(
trips= n(),
lat= mean(start_lat),
lng= mean(start_lng)) %>%
arrange(desc(trips)) %>%
filter(trips >= 50) #only includes stations with atleast 50 trips
## `summarise()` ungrouping output (override with `.groups` argument)
map %>%
addProviderTiles(providers$CartoDB.Positron)
map %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addCircleMarkers(data= stations_final,
#popup = paste("<strong>Business:</strong>", inspections_map$Business, "<br>",
# "<strong>Neighborhood:</strong>", inspections_map$NBHD, "<br>",
# "<strong>Score:</strong>", inspections_map$Score),
radius=log(stations_final$trips),
stroke = FALSE,
fillOpacity = 0.9,
opacity = 0.7#,
#fillColor = rest_pal(inspections_map$Score)
)
## Assuming "lng" and "lat" are longitude and latitude, respectively
#figure out how to draw lines between