library(mosaicData)
library(tidyverse)
#head(HELPrct)
HELPrct %>% select(age, cesd) %>% write_csv("HELPrct.csv")
p <- ggplot(HELPrct, aes(x = age, y = cesd)) +
geom_point() +
theme_classic() +
stat_smooth(method = "loess", formula = y ~ x, size = 2)
ggExtra::ggMarginal(p, type = "histogram", binwidth = 3)
Here is the link to my HELPrct Dashboard
Ch17 Problem 1 (Easy): Use the geocode function from the tidygeocoder package to find the latitude and longitude of the Emily Dickinson Museum in Amherst, Massachusetts.
Address of Emily Dickinson Museum in Amherst, Massachusetts: 280 Main St, Amherst, MA
library(tidygeocoder)
emily_dickinson_museum <- tibble(
address = " 280 Main St, Amherst, MA") %>%
tidygeocoder::geocode(address, method = "osm")
## Passing 1 address to the Nominatim single address geocoder
## Query completed in: 1 seconds
emily_dickinson_museum
## # A tibble: 1 × 3
## address lat long
## <chr> <dbl> <dbl>
## 1 " 280 Main St, Amherst, MA" 42.4 -72.5
library(leaflet)
emily_dickinson_museum_map <- leaflet() %>%
addTiles() %>%
addMarkers(data = emily_dickinson_museum)
## Assuming "long" and "lat" are longitude and latitude, respectively
emily_dickinson_museum_map
library(mdsr)
library(tidygeocoder)
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
head(Violations)
## # A tibble: 6 × 16
## camis dba boro build…¹ street zipcode phone inspection_date action
## <int> <chr> <chr> <int> <chr> <int> <dbl> <dttm> <chr>
## 1 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2015-02-09 00:00:00 Viola…
## 2 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2014-03-03 00:00:00 Viola…
## 3 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2013-10-10 00:00:00 No vi…
## 4 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2013-09-11 00:00:00 Viola…
## 5 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2013-09-11 00:00:00 Viola…
## 6 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2013-08-14 00:00:00 Viola…
## # … with 7 more variables: violation_code <chr>, score <int>, grade <chr>,
## # grade_date <dttm>, record_date <dttm>, inspection_type <chr>,
## # cuisine_code <dbl>, and abbreviated variable name ¹​building
dim(Violations)
## [1] 480621 16
x <- Violations %>% select(street, zipcode) %>%
unite(loc, street, zipcode, sep = ", ") %>%
unique()
dim(x)
## [1] 5306 1
x_loc <- x %>% geocode(loc, method = "osm") %>%
st_as_sf(coords = c("long", "lat")) %>%
st_set_crs(4326)
y_bind_col(x, x_loc)
y_sep <- y %>% separate(col = loc, c("street", "zipcode"), sep = "_") %>%
mutate(zipcode = as.integer(zipcode))
Violations_loaction <- read_csv("Violations_loc.csv")
## Rows: 480621 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): dba, boro, street, action, violation_code, grade, inspection_type
## dbl (8): camis, building, zipcode, phone, score, cuisine_code, lon, lat
## dttm (3): inspection_date, grade_date, record_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Violations_loaction)
## # A tibble: 6 × 18
## camis dba boro build…¹ street zipcode phone inspection_date action
## <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl> <dttm> <chr>
## 1 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2015-02-09 00:00:00 Viola…
## 2 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2014-03-03 00:00:00 Viola…
## 3 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2013-10-10 00:00:00 No vi…
## 4 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2013-09-11 00:00:00 Viola…
## 5 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2013-09-11 00:00:00 Viola…
## 6 30075445 MORRI… BRONX 1007 MORRI… 10462 7.19e9 2013-08-14 00:00:00 Viola…
## # … with 9 more variables: violation_code <chr>, score <dbl>, grade <chr>,
## # grade_date <dttm>, record_date <dttm>, inspection_type <chr>,
## # cuisine_code <dbl>, lon <dbl>, lat <dbl>, and abbreviated variable name
## # ¹​building
Violations_loaction_clean <- Violations_loaction %>% drop_na() %>%
select(street, zipcode, lon, lat) %>%
unite(loc, street, zipcode, sep = ", ") %>%
unique() %>%
mutate(longitude = lon, latitude = lat) %>%
select(loc, latitude, longitude)
dim(Violations_loaction_clean)
## [1] 4920 3
head(Violations_loaction_clean)
## # A tibble: 6 × 3
## loc latitude longitude
## <chr> <dbl> <dbl>
## 1 MORRIS PARK AVE, 10462 40.8 -73.9
## 2 FLATBUSH AVENUE, 11225 40.7 -74.0
## 3 WEST 57 STREET, 10019 40.8 -74.0
## 4 STILLWELL AVENUE, 11224 40.6 -74.0
## 5 ASTORIA BOULEVARD, 11369 40.8 -73.9
## 6 11 AVENUE, 11219 40.6 -74.0
# filter out longtitude and latitude outside of NYC
Violations_loaction_clean <- Violations_loaction_clean %>%
filter(longitude > -74 & longitude < -71)
dim(Violations_loaction_clean)
## [1] 4109 3
#Violations_loaction_clean
set.seed(12345)
Violations_loaction_clean_sf <- st_as_sf(x = Violations_loaction_clean,
coords = c("longitude","latitude"),
crs = 4326) #%>% st_transform(2263)
# check crs
st_crs(Violations_loaction_clean_sf)
## Coordinate Reference System:
## User input: EPSG:4326
## wkt:
## GEOGCRS["WGS 84",
## ENSEMBLE["World Geodetic System 1984 ensemble",
## MEMBER["World Geodetic System 1984 (Transit)"],
## MEMBER["World Geodetic System 1984 (G730)"],
## MEMBER["World Geodetic System 1984 (G873)"],
## MEMBER["World Geodetic System 1984 (G1150)"],
## MEMBER["World Geodetic System 1984 (G1674)"],
## MEMBER["World Geodetic System 1984 (G1762)"],
## MEMBER["World Geodetic System 1984 (G2139)"],
## ELLIPSOID["WGS 84",6378137,298.257223563,
## LENGTHUNIT["metre",1]],
## ENSEMBLEACCURACY[2.0]],
## PRIMEM["Greenwich",0,
## ANGLEUNIT["degree",0.0174532925199433]],
## CS[ellipsoidal,2],
## AXIS["geodetic latitude (Lat)",north,
## ORDER[1],
## ANGLEUNIT["degree",0.0174532925199433]],
## AXIS["geodetic longitude (Lon)",east,
## ORDER[2],
## ANGLEUNIT["degree",0.0174532925199433]],
## USAGE[
## SCOPE["Horizontal component of 3D system."],
## AREA["World."],
## BBOX[-90,-180,90,180]],
## ID["EPSG",4326]]
# preview
head(Violations_loaction_clean_sf)
## Simple feature collection with 6 features and 1 field
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -73.98414 ymin: 40.5906 xmax: -73.85992 ymax: 40.84652
## Geodetic CRS: WGS 84
## # A tibble: 6 × 2
## loc geometry
## <chr> <POINT [°]>
## 1 MORRIS PARK AVE, 10462 (-73.85992 40.84652)
## 2 FLATBUSH AVENUE, 11225 (-73.96283 40.66365)
## 3 WEST 57 STREET, 10019 (-73.98414 40.76727)
## 4 STILLWELL AVENUE, 11224 (-73.98409 40.5906)
## 5 ASTORIA BOULEVARD, 11369 (-73.87271 40.76249)
## 6 NOSTRAND AVENUE, 11226 (-73.94857 40.64142)
# install.packages("remotes")
remotes::install_github("mfherman/nycgeo")
## Skipping install of 'nycgeo' from a github remote, the SHA1 (4fee55c1) has not changed since last install.
## Use `force = TRUE` to force installation
library(nycgeo)
# create geography data frame
all_nyc_boroughs <- nyc_boundaries(geography = "borough") %>%
st_transform(4326)
## old-style crs object detected; please recreate object with a recent sf::st_crs()
boundary_plot <- ggplot(all_nyc_boroughs) +
geom_sf() +
scale_x_continuous(breaks = c(-72.677, -72.683))
boundary_plot
# randomly Sample 10 rows of data with dplyr
data_s1 <- sample_n(Violations_loaction_clean_sf, 10)
data_s1
## Simple feature collection with 10 features and 1 field
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -73.98785 ymin: 40.5847 xmax: -73.77814 ymax: 40.8324
## Geodetic CRS: WGS 84
## # A tibble: 10 × 2
## loc geometry
## * <chr> <POINT [°]>
## 1 LEXINGTON AVENUE, 10065 (-73.96544 40.76598)
## 2 YELLOWSTONE BOULEVARD, 11375 (-73.85206 40.72255)
## 3 ST NICHOLAS AVENUE, 10032 (-73.94088 40.8324)
## 4 IRVING AVE, 11237 (-73.91407 40.69884)
## 5 NOSTRAND AVENUE, 11225 (-73.94857 40.64142)
## 6 JFK INT AIRPORT, 11430 (-73.77814 40.64131)
## 7 SHORE PARKWAY, 11235 (-73.94803 40.5847)
## 8 18 AVENUE, 11204 (-73.98785 40.62143)
## 9 35 AVENUE, 11101 (-73.93011 40.75888)
## 10 70 STREET, 11377 (-73.89407 40.7336)
boundary_plot +
geom_sf(data = data_s1, color = "blue", size = 1.5)
# randomly Sample 50 rows of data with dplyr
data_s2 <- sample_n(Violations_loaction_clean_sf, 50)
boundary_plot +
geom_sf(data = data_s2, color = "blue", size = 1.5)
nyc_cd_plot <- ggplot() +
geom_sf(
data = nyc_cd,
color = "white",
lwd = 0.2,
aes(fill = borough_name),
alpha = 0.8) +
scale_fill_manual(values = c("lightblue", "deepskyblue4",
"blue",
"purple", "darkblue"),
guide = guide_legend(override.aes = list(
linetype = "blank",
shape = NA
))) +
labs(fill = "Borough",
title = "NYC Community Districts by Borough") +
theme_void() +
theme(plot.title.position = 'plot',
plot.title = element_text(hjust = 0.5)) +
theme(panel.grid = element_line(color = "transparent"))
nyc_cd_plot
# save plot
ggsave("nyc_cd_plot.png", plot = nyc_cd_plot)
# create plot
nyc_cd_restaurants_plot <- nyc_cd_plot +
geom_sf(
data = data_s1_sf,
aes(color = "GROW NYC Markets"),
alpha = 0.8,
show.legend = "point",
pch = 21,
color = "white",
fill = "black")
nyc_cd_restaurants_plot
# plot the random sample of 10 restaurant violations in New York City
leaflet() %>%
addTiles() %>%
addMarkers(data = data_s1)
# plot the random sample of 50 restaurant violations in New York City
leaflet() %>%
addTiles() %>%
addMarkers(data = data_s2)
library(macleish)
## Loading required package: etl
boundary <- macleish_layers %>%
pluck("boundary")
buildings <- macleish_layers %>%
pluck("buildings")
landmarks <- macleish_layers %>%
pluck("landmarks")
trails <- macleish_layers %>%
pluck("trails")
camp_sites <- macleish_layers %>%
pluck("camp_sites")
research <- macleish_layers %>%
pluck("research")
challenge_courses <- macleish_layers %>%
pluck("challenge_courses")
forests <- macleish_layers %>%
pluck("forests")
streams <- macleish_layers %>%
pluck("streams")
wetlands <- macleish_layers %>%
pluck("wetlands")
boundary_plot <- ggplot(boundary) +
geom_sf() +
scale_x_continuous(breaks = c(-72.677, -72.683))
boundary_plot +
geom_sf(data = forests, fill = "green", alpha = 0.1) +
geom_sf(data = streams, color = "light blue", size = 0.5) +
geom_sf(data = buildings, color = "dark grey", size = 1) +
geom_sf(data = landmarks, color = "grey", size = 1) +
geom_sf(data = trails, color = "dark green", size = 1) +
geom_sf(data = camp_sites, size = 4) + geom_sf_label(
data = camp_sites, aes(label = name),
nudge_y = 0.001
) +
geom_sf(data = research, color = "pink", size = 0.5) +
geom_sf(data = challenge_courses, color = "black", size = 0.5)
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
forest_pal <- colorFactor("Greens", forests$type)
map <- leaflet() %>%
# Base groups
addTiles(group = "OpenStreetMap") %>%
addProviderTiles("Esri.WorldTopoMap", group = "Topography") %>%
addProviderTiles("Esri.WorldImagery", group = "Satellite") %>%
addProviderTiles("Stamen.TonerLite", group = "Toner Lite") %>%
# Boundaries
addPolygons(data = boundary, weight = 1, fillOpacity = 0.01, group = "Boundaries") %>%
# Man-Made structures
addPolygons(
data = buildings,
weight = 1, popup = ~name, group = "Structures"
) %>%
addMarkers(
data = landmarks,
popup = ~Label, group = "Structures"
) %>%
addPolylines(
data = trails,
weight = 1, color = "brown",
popup = ~name, group = "Structures"
) %>%
# Natural elements
addPolygons(
data = forests,
color = ~ forest_pal(type), weight = 0.1,
fillOpacity = 0.2,
popup = ~type, group = "Natural"
) %>%
addPolygons(
data = wetlands,
weight = 1, group = "Natural"
) %>%
addPolylines(
data = streams,
weight = 2, group = "Natural"
) %>%
# Layers control
addLayersControl(
baseGroups = c("OpenStreetMap", "Topography", "Satellite", "Toner Lite"),
overlayGroups = c("Boundaries", "Structures", "Natural"),
options = layersControlOptions(collapsed = FALSE)
)
map