Load Packages
require(rmarkdown);
## Loading required package: rmarkdown
require(knitr);
## Loading required package: knitr
require(tidyverse);
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(sf);
## Loading required package: sf
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
require(mapview);
## Loading required package: mapview
require(magrittr);
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
require(janitor);
## Loading required package: janitor
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
require(stringr);
require(ggplot2);
require(ggmap);
## Loading required package: ggmap
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
## Attaching package: 'ggmap'
##
## The following object is masked from 'package:magrittr':
##
## inset
require(leaflet);
## Loading required package: leaflet
require(gganimate);
## Loading required package: gganimate
## No renderer backend detected. gganimate will default to writing frames to separate files
## Consider installing:
## - the `gifski` package for gif output
## - the `av` package for video output
## and restarting the R session
require(tmap)
## Loading required package: tmap
require(scales)
## Loading required package: scales
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
require(ggpubr)
## Loading required package: ggpubr
nyc_zipdata <- st_read('/Users/alex/Desktop/Data Viz with R/Second Section/zip_data4.geojson')
## Reading layer `zip_data4' from data source
## `/Users/alex/Desktop/Data Viz with R/Second Section/zip_data4.geojson'
## using driver `GeoJSON'
## Simple feature collection with 248 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
acs_popbyzip <- st_read("~/Desktop/Data Viz with R/Second Section/acsPopByZip.gpkg")
## Reading layer `NYC_COVID_Pop_by_Zip' from data source
## `/Users/alex/Desktop/Data Viz with R/Second Section/acsPopByZip.gpkg'
## using driver `GPKG'
## Simple feature collection with 180 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067113 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
glimpse(nyc_zipdata)
## Rows: 248
## Columns: 13
## $ Zipcode <chr> "00083", "10001", "10002", "10003", "10004", "10005"…
## $ PO_NAME <chr> "Central Park", "New York", "New York", "New York", …
## $ COUNTY <chr> "New York", "New York", "New York", "New York", "New…
## $ NEIGHBORHOOD_NAME <chr> NA, "Chelsea/NoMad/West Chelsea", "Chinatown/Lower E…
## $ POPULATION <dbl> 25, 22413, 81305, 55878, 2187, 8107, 3011, 7323, 614…
## $ COVID_CASE_COUNT <dbl> NA, 1542, 5902, 2803, 247, 413, 164, 379, 3605, 1686…
## $ TOTAL_COVID_TESTS <dbl> NA, 20158, 48197, 41076, 3599, 6102, 2441, 6342, 387…
## $ PERCENT_POSITIVE <dbl> NA, 7.86, 12.63, 6.93, 6.92, 6.72, 6.84, 6.07, 9.49,…
## $ COVID_CASE_RATE <dbl> NA, 5584.31, 7835.62, 5192.87, 8310.58, 4716.11, 484…
## $ COVID_DEATH_RATE <dbl> NA, 126.75, 350.49, 88.93, 67.29, 0.00, 29.57, 57.21…
## $ TotalStores <int> NA, 60, 192, 102, 10, 11, 4, 26, 88, 56, 102, 45, 11…
## $ TotalHealthFac <int> 0, 14, 40, 40, 2, 2, 2, 2, 40, 14, 14, 40, 40, 26, 1…
## $ geometry <MULTIPOLYGON [US_survey_foot]> MULTIPOLYGON (((998309.7 2…
glimpse(acs_popbyzip)
## Rows: 180
## Columns: 14
## $ ZIPCODE <chr> "10001", "10002", "10003", "10004", "10005", "10006", "10007",…
## $ PO_NAME <chr> "New York", "New York", "New York", "New York", "New York", "N…
## $ POPULAT <dbl> 22413, 81305, 55878, 2187, 8107, 3011, 7323, 61455, 29881, 505…
## $ COUNTY <chr> "New York", "New York", "New York", "New York", "New York", "N…
## $ Positiv <int> 260, 712, 347, 24, 44, 14, 40, 518, 201, 394, 116, 179, 233, 5…
## $ Total <int> 571, 1358, 830, 64, 137, 54, 130, 1180, 561, 852, 330, 426, 52…
## $ totPop <dbl> 25645, 76815, 54181, NA, 8809, 2354, 9145, 56178, 33297, 51288…
## $ malPctg <dbl> 51.89706, 48.23277, 50.34053, NA, 42.81984, 39.59218, 51.89721…
## $ asianPp <dbl> 6788, 32502, 8027, NA, 1974, 640, 1729, 8150, 6188, 5666, 4680…
## $ blackPp <dbl> 1897, 7882, 3443, NA, 291, 48, 354, 6302, 1613, 3632, 973, 124…
## $ hspncPp <dbl> 3679, 21621, 4375, NA, 504, 245, 532, 13116, 3712, 5834, 1441,…
## $ whitePp <dbl> 16725, 27156, 42259, NA, 6603, 1693, 7332, 37268, 25437, 42528…
## $ eldrlyP <dbl> 3239, 15770, 6391, NA, 206, 66, 601, 8602, 4716, 8176, 3269, 4…
## $ geom <MULTIPOLYGON [US_survey_foot]> MULTIPOLYGON (((981958.6 21..., MULT…
#test map
ggplot(nyc_zipdata) +
geom_sf(aes(fill=COVID_CASE_COUNT))

#Trying quantile breaks
range(nyc_zipdata$COVID_CASE_RATE)
## [1] NA NA
require(classInt)
## Loading required package: classInt
breaks_qt <- classIntervals(c(min(nyc_zipdata$COVID_CASE_RATE) - .00001,
nyc_zipdata$COVID_CASE_RATE), n = 7, style = "quantile")
## Warning in classIntervals(c(min(nyc_zipdata$COVID_CASE_RATE) - 1e-05,
## nyc_zipdata$COVID_CASE_RATE), : var has missing values, omitted in finding
## classes
breaks_qt
## style: quantile
## [3413.15,5516.366) [5516.366,7187.643) [7187.643,8171.006) [8171.006,9071.344)
## 26 25 25 25
## [9071.344,9996.29) [9996.29,11124.31) [11124.31,16211.67]
## 25 25 26
# Static Map 1, COVID Data
nyc_zipdata_a <- mutate(nyc_zipdata, COVID_CASE_RATE = cut(COVID_CASE_RATE, breaks_qt$brks, dig.lab=6, digits = 1))
ggplot(nyc_zipdata_a) +
geom_sf(aes(fill=COVID_CASE_RATE), color=NA) +
scale_fill_brewer(palette = "RdPu", name = 'Case Rate') +
labs(y = "Latitude", x = "Longitude", title="COVID Case Rates by NYC Zipcode, Week of 4/23/21") +
coord_sf(crs = st_crs(2263)) +
theme_minimal()

# Static Map 2, Health Facilities
sapply(nyc_zipdata, class)
## $Zipcode
## [1] "character"
##
## $PO_NAME
## [1] "character"
##
## $COUNTY
## [1] "character"
##
## $NEIGHBORHOOD_NAME
## [1] "character"
##
## $POPULATION
## [1] "numeric"
##
## $COVID_CASE_COUNT
## [1] "numeric"
##
## $TOTAL_COVID_TESTS
## [1] "numeric"
##
## $PERCENT_POSITIVE
## [1] "numeric"
##
## $COVID_CASE_RATE
## [1] "numeric"
##
## $COVID_DEATH_RATE
## [1] "numeric"
##
## $TotalStores
## [1] "integer"
##
## $TotalHealthFac
## [1] "integer"
##
## $geometry
## [1] "sfc_MULTIPOLYGON" "sfc"
ggplot(nyc_zipdata) +
geom_sf(aes(fill=TotalHealthFac), color=NA) +
scale_fill_gradient(low = "lightblue", high = "darkblue", name = 'Number of Health Facilities') +
labs(y = "Latitude", x = "Longitude", title="NYC Health Facilities by Zipcode") +
coord_sf(default_crs = sf::st_crs(2263)) +
theme_minimal()

#Checking that I did it right in terms of health facilities map
nys_health_2263 <- st_read("~/Desktop/Data Viz with R/Second Section/nys_health_2263.geojson")
## Reading layer `nys_health_2263' from data source
## `/Users/alex/Desktop/Data Viz with R/Second Section/nys_health_2263.geojson'
## using driver `GeoJSON'
## Simple feature collection with 7686 features and 34 fields
## Geometry type: POINT
## Dimension: XY
## Bounding box: xmin: -544951.7 ymin: 127612.4 xmax: 1489595 ymax: 1755603
## Projected CRS: NAD83 / New York Long Island (ftUS)
tot_stores <- read_csv("~/Desktop/Data Viz with R/Second Section/tot_stores.csv")
## Rows: 192 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): Zip.Code, TotalStores
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tot_healthfac <- read_csv("~/Desktop/Data Viz with R/Second Section/tot_healthfac.geojson")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 357 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): {
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
NYC_Stores <- read_csv("~/Desktop/Data Viz with R/Second Section/NYC_Stores.geojson")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 14238 Columns: 1
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): {
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
nys_health_2263 <- nys_health_2263
filtered_healthfacs_points <- st_intersection(nys_health_2263, nyc_zipdata)
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
ggplot(nyc_zipdata) +
geom_sf(aes(fill=TotalHealthFac)) +
geom_sf(data=filtered_healthfacs_points, color="gold", fill=NA, size = 0.001) +
scale_fill_gradient(low = "lightblue", high = "darkblue", name = 'Number of Health Facilities') +
labs(y = "Latitude", x = "Longitude", title="NYC Health Facilities by Zipcode") +
coord_sf(default_crs = sf::st_crs(2263)) +
theme_minimal()

#Comparing with an interactive map to triple check
mapview(nyc_zipdata, zcol = "TotalHealthFac", color = "lightblue", legend = TRUE) + mapview(filtered_healthfacs_points, color = "chartreuse", fill=NA,
cex = 3, legend = FALSE)