Problem 1

library(mosaicData)
library(tidyverse)
#head(HELPrct)
HELPrct %>% select(age, cesd) %>% write_csv("HELPrct.csv")
p <- ggplot(HELPrct, aes(x = age, y = cesd)) +
  geom_point() +
  theme_classic() +
  stat_smooth(method = "loess", formula = y ~ x, size = 2)
ggExtra::ggMarginal(p, type = "histogram", binwidth = 3)

Here is the link to my HELPrct Dashboard

Problem 2

Ch17 Problem 1 (Easy): Use the geocode function from the tidygeocoder package to find the latitude and longitude of the Emily Dickinson Museum in Amherst, Massachusetts.

Address of Emily Dickinson Museum in Amherst, Massachusetts: 280 Main St, Amherst, MA

library(tidygeocoder)
emily_dickinson_museum <- tibble(
  address = " 280 Main St, Amherst, MA") %>%
  tidygeocoder::geocode(address, method = "osm")
## Passing 1 address to the Nominatim single address geocoder
## Query completed in: 1 seconds
emily_dickinson_museum
## # A tibble: 1 × 3
##   address                       lat  long
##   <chr>                       <dbl> <dbl>
## 1 " 280 Main St, Amherst, MA"  42.4 -72.5

I couldn’t knit the leaflet map to pdf, so I don’t evaluation this code chunck.

library(leaflet)
emily_dickinson_museum_map <- leaflet() %>% 
  addTiles() %>%
  addMarkers(data = emily_dickinson_museum)
## Assuming "long" and "lat" are longitude and latitude, respectively
emily_dickinson_museum_map

Problem 3

Ch 18: Problem 1 (Medium): The Violations data frame in the mdsr package contains information on violations noted in Board of Health inspections of New York City restaurants. These data contain spatial information in the form of addresses and zip codes.

a. Use the geocode function in tidygeocoder to obtain spatial coordinates for these restaurants.

library(mdsr)
library(tidygeocoder)
library(sf)
## Linking to GEOS 3.10.2, GDAL 3.4.2, PROJ 8.2.1; sf_use_s2() is TRUE
head(Violations)
## # A tibble: 6 × 16
##      camis dba    boro  build…¹ street zipcode  phone inspection_date     action
##      <int> <chr>  <chr>   <int> <chr>    <int>  <dbl> <dttm>              <chr> 
## 1 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2015-02-09 00:00:00 Viola…
## 2 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2014-03-03 00:00:00 Viola…
## 3 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2013-10-10 00:00:00 No vi…
## 4 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2013-09-11 00:00:00 Viola…
## 5 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2013-09-11 00:00:00 Viola…
## 6 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2013-08-14 00:00:00 Viola…
## # … with 7 more variables: violation_code <chr>, score <int>, grade <chr>,
## #   grade_date <dttm>, record_date <dttm>, inspection_type <chr>,
## #   cuisine_code <dbl>, and abbreviated variable name ¹​building
dim(Violations)
## [1] 480621     16
x <- Violations %>% select(street, zipcode) %>% 
  unite(loc, street, zipcode, sep = ", ") %>% 
  unique()
dim(x)
## [1] 5306    1
x_loc <- x %>% geocode(loc, method = "osm") %>%
  st_as_sf(coords = c("long", "lat")) %>%
  st_set_crs(4326)

y_bind_col(x, x_loc)

y_sep <- y %>% separate(col = loc, c("street", "zipcode"), sep = "_") %>% 
  mutate(zipcode = as.integer(zipcode))
Violations_loaction <- read_csv("Violations_loc.csv")
## Rows: 480621 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): dba, boro, street, action, violation_code, grade, inspection_type
## dbl  (8): camis, building, zipcode, phone, score, cuisine_code, lon, lat
## dttm (3): inspection_date, grade_date, record_date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(Violations_loaction)
## # A tibble: 6 × 18
##      camis dba    boro  build…¹ street zipcode  phone inspection_date     action
##      <dbl> <chr>  <chr>   <dbl> <chr>    <dbl>  <dbl> <dttm>              <chr> 
## 1 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2015-02-09 00:00:00 Viola…
## 2 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2014-03-03 00:00:00 Viola…
## 3 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2013-10-10 00:00:00 No vi…
## 4 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2013-09-11 00:00:00 Viola…
## 5 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2013-09-11 00:00:00 Viola…
## 6 30075445 MORRI… BRONX    1007 MORRI…   10462 7.19e9 2013-08-14 00:00:00 Viola…
## # … with 9 more variables: violation_code <chr>, score <dbl>, grade <chr>,
## #   grade_date <dttm>, record_date <dttm>, inspection_type <chr>,
## #   cuisine_code <dbl>, lon <dbl>, lat <dbl>, and abbreviated variable name
## #   ¹​building

b. Using the spatial coordinates you obtained in the previous exercise, create an informative static map using ggspatial that illustrates the nature and extent of restaurant violations in New York City.

Violations_loaction_clean <- Violations_loaction %>% drop_na() %>% 
  select(street, zipcode, lon, lat) %>% 
  unite(loc, street, zipcode, sep = ", ") %>% 
  unique() %>% 
  mutate(longitude = lon, latitude = lat) %>% 
  select(loc, latitude, longitude)
dim(Violations_loaction_clean)
## [1] 4920    3
head(Violations_loaction_clean)
## # A tibble: 6 × 3
##   loc                      latitude longitude
##   <chr>                       <dbl>     <dbl>
## 1 MORRIS PARK AVE, 10462       40.8     -73.9
## 2 FLATBUSH AVENUE, 11225       40.7     -74.0
## 3 WEST   57 STREET, 10019      40.8     -74.0
## 4 STILLWELL AVENUE, 11224      40.6     -74.0
## 5 ASTORIA BOULEVARD, 11369     40.8     -73.9
## 6 11 AVENUE, 11219             40.6     -74.0
# filter out longtitude and latitude outside of NYC
Violations_loaction_clean <- Violations_loaction_clean %>% 
  filter(longitude > -74 & longitude < -71) 
dim(Violations_loaction_clean)
## [1] 4109    3
#Violations_loaction_clean
set.seed(12345)
Violations_loaction_clean_sf <- st_as_sf(x = Violations_loaction_clean,
         coords = c("longitude","latitude"),
         crs = 4326) #%>% st_transform(2263)

# check crs
st_crs(Violations_loaction_clean_sf)
## Coordinate Reference System:
##   User input: EPSG:4326 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]
# preview
head(Violations_loaction_clean_sf)
## Simple feature collection with 6 features and 1 field
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -73.98414 ymin: 40.5906 xmax: -73.85992 ymax: 40.84652
## Geodetic CRS:  WGS 84
## # A tibble: 6 × 2
##   loc                                  geometry
##   <chr>                             <POINT [°]>
## 1 MORRIS PARK AVE, 10462   (-73.85992 40.84652)
## 2 FLATBUSH AVENUE, 11225   (-73.96283 40.66365)
## 3 WEST   57 STREET, 10019  (-73.98414 40.76727)
## 4 STILLWELL AVENUE, 11224   (-73.98409 40.5906)
## 5 ASTORIA BOULEVARD, 11369 (-73.87271 40.76249)
## 6 NOSTRAND AVENUE, 11226   (-73.94857 40.64142)
# install.packages("remotes")
remotes::install_github("mfherman/nycgeo")
## Skipping install of 'nycgeo' from a github remote, the SHA1 (4fee55c1) has not changed since last install.
##   Use `force = TRUE` to force installation
library(nycgeo)
# create geography data frame
all_nyc_boroughs <- nyc_boundaries(geography = "borough") %>% 
  st_transform(4326)
## old-style crs object detected; please recreate object with a recent sf::st_crs()
boundary_plot <- ggplot(all_nyc_boroughs) + 
  geom_sf() + 
  scale_x_continuous(breaks = c(-72.677, -72.683))
boundary_plot

# randomly Sample 10 rows of data with dplyr
data_s1 <- sample_n(Violations_loaction_clean_sf, 10)   
data_s1 
## Simple feature collection with 10 features and 1 field
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -73.98785 ymin: 40.5847 xmax: -73.77814 ymax: 40.8324
## Geodetic CRS:  WGS 84
## # A tibble: 10 × 2
##    loc                                      geometry
##  * <chr>                                 <POINT [°]>
##  1 LEXINGTON AVENUE, 10065      (-73.96544 40.76598)
##  2 YELLOWSTONE BOULEVARD, 11375 (-73.85206 40.72255)
##  3 ST NICHOLAS AVENUE, 10032     (-73.94088 40.8324)
##  4 IRVING AVE, 11237            (-73.91407 40.69884)
##  5 NOSTRAND AVENUE, 11225       (-73.94857 40.64142)
##  6 JFK INT AIRPORT, 11430       (-73.77814 40.64131)
##  7 SHORE PARKWAY, 11235          (-73.94803 40.5847)
##  8 18 AVENUE, 11204             (-73.98785 40.62143)
##  9 35 AVENUE, 11101             (-73.93011 40.75888)
## 10 70 STREET, 11377              (-73.89407 40.7336)
boundary_plot +
  geom_sf(data = data_s1, color = "blue", size = 1.5)

# randomly Sample 50 rows of data with dplyr
data_s2 <- sample_n(Violations_loaction_clean_sf, 50)   
boundary_plot +
  geom_sf(data = data_s2, color = "blue", size = 1.5)

nyc_cd_plot <- ggplot() +
  geom_sf(
    data = nyc_cd,
    color = "white",
    lwd = 0.2,
    aes(fill = borough_name),
    alpha = 0.8) +
  scale_fill_manual(values = c("lightblue", "deepskyblue4",
                               "blue",
                               "purple", "darkblue"),
                               guide = guide_legend(override.aes = list(
                                 linetype = "blank",
                                 shape = NA
                               ))) +
  labs(fill = "Borough", 
       title = "NYC Community Districts by Borough") +
  theme_void() +
  theme(plot.title.position = 'plot', 
        plot.title = element_text(hjust = 0.5)) +
  theme(panel.grid = element_line(color = "transparent"))

nyc_cd_plot
# save plot
ggsave("nyc_cd_plot.png", plot = nyc_cd_plot)
# create plot
nyc_cd_restaurants_plot <- nyc_cd_plot +  
  geom_sf(
    data = data_s1_sf,
    aes(color = "GROW NYC Markets"),
    alpha = 0.8,
    show.legend = "point",
    pch = 21,
    color = "white",
    fill = "black") 
nyc_cd_restaurants_plot

c. Using the spatial coordinates you obtained in the previous exercises, create an informative interactive map using leaflet that illustrates the nature and extent of restaurant violations in New York City.

I couldn’t knit the leaflet map to pdf, so I don’t evaluation this code chunck.

# plot the random sample of 10 restaurant violations in New York City
leaflet() %>% 
  addTiles() %>%
  addMarkers(data = data_s1) 

I couldn’t knit the leaflet map to pdf, so I don’t evaluation this code chunck.

# plot the random sample of 50 restaurant violations in New York City
leaflet() %>% 
  addTiles() %>%
  addMarkers(data = data_s2)

Problem 4 (Medium):

a. Use the spatial data in the macleish package and ggspatial to make an informative static map of the MacLeish Field Station property.

library(macleish)
## Loading required package: etl
boundary <- macleish_layers %>%
  pluck("boundary")

buildings <- macleish_layers %>%
  pluck("buildings")

landmarks <- macleish_layers %>%
  pluck("landmarks")

trails <- macleish_layers %>%
  pluck("trails")

camp_sites <- macleish_layers %>%
  pluck("camp_sites")

research <-  macleish_layers %>%
  pluck("research")

challenge_courses <- macleish_layers %>%
  pluck("challenge_courses")

forests <-  macleish_layers %>%
  pluck("forests")

streams <- macleish_layers %>%
  pluck("streams")

wetlands <- macleish_layers %>%
  pluck("wetlands")

boundary_plot <- ggplot(boundary) + 
  geom_sf() + 
  scale_x_continuous(breaks = c(-72.677, -72.683))

boundary_plot +
  geom_sf(data = forests, fill = "green", alpha = 0.1) +
  geom_sf(data = streams, color = "light blue", size = 0.5) +
  geom_sf(data = buildings, color = "dark grey", size = 1) +
  geom_sf(data = landmarks, color = "grey", size = 1) +
  geom_sf(data = trails, color = "dark green", size = 1) +
  geom_sf(data = camp_sites, size = 4) + geom_sf_label(
    data = camp_sites, aes(label = name), 
    nudge_y = 0.001
  ) +
  geom_sf(data = research, color = "pink", size = 0.5) +
  geom_sf(data = challenge_courses, color = "black", size = 0.5) 
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

b. Use the spatial data in the macleish package and leaflet to make an informative interactive map of the MacLeish Field Station property.

I couldn’t knit the leaflet map to pdf, so I don’t evaluation this code chunck.

forest_pal <- colorFactor("Greens", forests$type)

map <- leaflet() %>%
  # Base groups
  addTiles(group = "OpenStreetMap") %>%
  addProviderTiles("Esri.WorldTopoMap", group = "Topography") %>%
  addProviderTiles("Esri.WorldImagery", group = "Satellite") %>%
  addProviderTiles("Stamen.TonerLite", group = "Toner Lite") %>%
  # Boundaries
  addPolygons(data = boundary, weight = 1, fillOpacity = 0.01, group = "Boundaries") %>%
  # Man-Made structures
  addPolygons(
    data = buildings,
    weight = 1, popup = ~name, group = "Structures"
  ) %>%
  addMarkers(
    data = landmarks,
    popup = ~Label, group = "Structures"
  ) %>%
  addPolylines(
    data = trails,
    weight = 1, color = "brown",
    popup = ~name, group = "Structures"
  ) %>%
  # Natural elements
  addPolygons(
    data = forests,
    color = ~ forest_pal(type), weight = 0.1,
    fillOpacity = 0.2,
    popup = ~type, group = "Natural"
  ) %>%
  addPolygons(
    data = wetlands,
    weight = 1, group = "Natural"
  ) %>%
  addPolylines(
    data = streams,
    weight = 2, group = "Natural"
  ) %>%
  # Layers control
  addLayersControl(
    baseGroups = c("OpenStreetMap", "Topography", "Satellite", "Toner Lite"),
    overlayGroups = c("Boundaries", "Structures", "Natural"),
    options = layersControlOptions(collapsed = FALSE)
  ) 
map