task 1:Plot at least two high-quality static maps with one using the
COVID-19 data and one using a related factor. You can use either plot
method for sf or ggplot method.
########################## MAP 1: COVID-19 data #######################################
# Read COVID-19 data for NYC
nyc_covid_data <- readr::read_csv("./R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE,show_col_types = FALSE)
str(nyc_covid_data)
## spc_tbl_ [177 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ MODIFIED_ZCTA : num [1:177] 10001 10002 10003 10004 10005 ...
## $ NEIGHBORHOOD_NAME: chr [1:177] "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
## $ BOROUGH_GROUP : chr [1:177] "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
## $ label : chr [1:177] "10001, 10118" "10002" "10003" "10004" ...
## $ lat : num [1:177] 40.8 40.7 40.7 40.7 40.7 ...
## $ lon : num [1:177] -74 -74 -74 -74 -74 ...
## $ COVID_CASE_COUNT : num [1:177] 1542 5902 2803 247 413 ...
## $ COVID_CASE_RATE : num [1:177] 5584 7836 5193 8311 4716 ...
## $ POP_DENOMINATOR : num [1:177] 27613 75323 53978 2972 8757 ...
## $ COVID_DEATH_COUNT: num [1:177] 35 264 48 2 0 1 4 118 37 62 ...
## $ COVID_DEATH_RATE : num [1:177] 126.8 350.5 88.9 67.3 0 ...
## $ PERCENT_POSITIVE : num [1:177] 7.86 12.63 6.93 6.92 6.72 ...
## $ TOTAL_COVID_TESTS: num [1:177] 20158 48197 41076 3599 6102 ...
## - attr(*, "spec")=
## .. cols(
## .. MODIFIED_ZCTA = col_double(),
## .. NEIGHBORHOOD_NAME = col_character(),
## .. BOROUGH_GROUP = col_character(),
## .. label = col_character(),
## .. lat = col_double(),
## .. lon = col_double(),
## .. COVID_CASE_COUNT = col_double(),
## .. COVID_CASE_RATE = col_double(),
## .. POP_DENOMINATOR = col_double(),
## .. COVID_DEATH_COUNT = col_double(),
## .. COVID_DEATH_RATE = col_double(),
## .. PERCENT_POSITIVE = col_double(),
## .. TOTAL_COVID_TESTS = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
# Read shapefile for NYC zip
nyc_shapefile <-st_read("./ZIP_CODE_040114/ZIP_CODE_040114.shp")
## Reading layer `ZIP_CODE_040114' from data source
## `C:\Users\melin\Documents\GTECH78520\part3\ZIP_CODE_040114\ZIP_CODE_040114.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)
# Merge COVID-19 data with the shapefile
nyc_COVID_sf <- merge(nyc_shapefile, nyc_covid_data, by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")
#qick plot to look at the data
plot(nyc_COVID_sf,max.plot = 24 )

## ggplot method
# look at how the values are distributed
hist(nyc_COVID_sf$PERCENT_POSITIVE,
breaks = 10,
col = "blue",
main = "Histogram",
xlab = "PERCENT_POSITIVE",
ylab = "Frequency"
)

# First create the quantile breaks
breaks_qt <- classIntervals(c(min(nyc_COVID_sf$PERCENT_POSITIVE) - .00001,nyc_COVID_sf$PERCENT_POSITIVE), n = 5, style = "quantile")
# Use cut to divide percent postive into intervals
nyc_COVID_sf <- mutate(nyc_COVID_sf,
nyc_COVID_cut = cut(PERCENT_POSITIVE, breaks_qt$brks, dig.lab = 4, digits=1))
# Plot results
ggplot(nyc_COVID_sf) +
geom_sf(aes(fill = nyc_COVID_cut)) +
scale_fill_brewer(palette = "OrRd", name = "Percent Positive") +
labs(x = 'Longitude', y = 'Latitude', title = 'COVID-19 Positive Cases in NYC', caption = 'Data Source: [NYC Department of Health and Mental Hygiene (DOHMH) ]') +
theme_minimal() +
theme(legend.position = "top", plot.title = element_text(size = 16, face = "bold"), axis.title = element_text(size = 14))

########################## MAP 1: Elderly Population #######################################
# Load the NYC census tracts from Department of Planning
nycCensus <- sf::st_read('./R-Spatial_II_Lab/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)
## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\melin\Documents\GTECH78520\part3\R-Spatial_II_Lab\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS84(DD)
str(nycCensus)
## Classes 'sf' and 'data.frame': 2165 obs. of 12 variables:
## $ boro_code : chr "5" "1" "1" "1" ...
## $ boro_ct201: chr "5000900" "1009800" "1010000" "1010200" ...
## $ boro_name : chr "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
## $ cdeligibil: chr "E" "I" "I" "I" ...
## $ ct2010 : chr "000900" "009800" "010000" "010200" ...
## $ ctlabel : chr "9" "98" "100" "102" ...
## $ ntacode : chr "SI22" "MN19" "MN19" "MN17" ...
## $ ntaname : chr "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
## $ puma : chr "3903" "3808" "3808" "3807" ...
## $ shape_area: num 2497010 1906016 1860938 1860993 1864600 ...
## $ shape_leng: num 7729 5534 5692 5688 5693 ...
## $ geometry :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
## ..$ :List of 1
## .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
## ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
## - attr(*, "sf_column")= chr "geometry"
## - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
## ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...
# does not use the standard County FIPS, but boro code
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
boro_name == 'Bronx' ~ '005',
boro_name == 'Brooklyn' ~ '047',
boro_name == 'Manhattan' ~ '061',
boro_name == 'Queens' ~ '081',
boro_name == 'Staten Island' ~ '085'),
tractFIPS = paste(cntyFIPS, ct2010, sep='')
)
# Load the ACS data from CENSUS website
acsData <- readLines("./R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
magrittr::extract(-2) %>%
textConnection() %>%
read.csv(header=TRUE, quote= "\"") %>%
dplyr::select(GEO_ID,
totPop = DP05_0001E, elderlyPop = DP05_0024E) %>%
dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));
# now extract columns 1 through 10 from the acsData data frame
acsData %>%
magrittr::extract(1:10,)
## GEO_ID totPop elderlyPop censusCode
## 1 1400000US36005000100 7080 51 005000100
## 2 1400000US36005000200 4542 950 005000200
## 3 1400000US36005000400 5634 710 005000400
## 4 1400000US36005001600 5917 989 005001600
## 5 1400000US36005001900 2765 76 005001900
## 6 1400000US36005002000 9409 977 005002000
## 7 1400000US36005002300 4600 648 005002300
## 8 1400000US36005002400 172 0 005002400
## 9 1400000US36005002500 5887 548 005002500
## 10 1400000US36005002701 2868 243 005002701
# Join ACS data to the census tracts
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')
# Transform popData data to match the CRS of zipcode data
popNYC <- sf::st_transform(popData, st_crs(nyc_shapefile))
# Now aggregate to the zip code level
PopZipNYC <- sf::st_join(nyc_shapefile,
popNYC %>% sf::st_centroid(),
join = st_contains) %>%
group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY) %>%
summarise(totPop = sum(totPop),
elderlyPop = sum(elderlyPop)
)
## Warning: st_centroid assumes attributes are constant over geometries
## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION'. You can
## override using the `.groups` argument.
PopZipNYC %>% head()
## Simple feature collection with 6 features and 6 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 7
## # Groups: ZIPCODE, PO_NAME, POPULATION [6]
## ZIPCODE PO_NAME POPULAT…¹ COUNTY totPop elder…² geometry
## <chr> <chr> <dbl> <chr> <int> <int> <GEOMETRY [US_survey_foo>
## 1 00083 Central Park 25 New Y… 3 0 POLYGON ((998309.7 22961…
## 2 10001 New York 22413 New Y… 19146 2500 POLYGON ((981958.6 21346…
## 3 10002 New York 81305 New Y… 74310 15815 POLYGON ((991339.9 20757…
## 4 10003 New York 55878 New Y… 53487 6296 POLYGON ((989830.5 20704…
## 5 10004 New York 2187 New Y… NA NA MULTIPOLYGON (((977503.7…
## 6 10005 New York 8107 New Y… 8809 209 POLYGON ((982595.7 19588…
## # … with abbreviated variable names ¹​POPULATION, ²​elderlyPop
# See where we have those NAs
PopZipNYC %>% dplyr::filter(is.na(totPop))
## Simple feature collection with 68 features and 6 fields
## Geometry type: GEOMETRY
## Dimension: XY
## Bounding box: xmin: 971132.6 ymin: 151085.5 xmax: 1067494 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 68 × 7
## # Groups: ZIPCODE, PO_NAME, POPULATION [67]
## ZIPCODE PO_NAME POPULATION COUNTY totPop elder…¹ geometry
## * <chr> <chr> <dbl> <chr> <int> <int> <GEOMETRY [US_survey_foo>
## 1 10004 New York 2187 New York NA NA MULTIPOLYGON (((977503.7…
## 2 10020 New York 0 New York NA NA POLYGON ((990566.4 21563…
## 3 10041 New York 0 New York NA NA POLYGON ((981406.8 19545…
## 4 10043 New York 0 New York NA NA POLYGON ((982286 195715.…
## 5 10045 New York 0 New York NA NA POLYGON ((981726 197499.…
## 6 10047 New York 0 New York NA NA POLYGON ((988037.5 20872…
## 7 10048 New York 0 New York NA NA POLYGON ((981026.8 19786…
## 8 10055 New York 12 New York NA NA POLYGON ((991432.1 21589…
## 9 10075 New York 25203 New York NA NA POLYGON ((998799 219864.…
## 10 10080 New York 0 New York NA NA POLYGON ((980680.1 19813…
## # … with 58 more rows, and abbreviated variable name ¹​elderlyPop
## ggplot method
# Plot results
ggplot() +
geom_sf(data = PopZipNYC, aes(fill = elderlyPop)) +
scale_fill_gradient(name = 'Elderly Population', low = 'white', high = 'red') +
labs(x = 'Longitude', y = 'Latitude', title = 'Elderly Population in NYC', caption = 'Data Source: [CENSUS website]') +
theme_minimal() +
theme( plot.title = element_text(size = 16, face = "bold"), axis.title = element_text(size = 14))

task 3: Create a web-based interactive map for COIVD-19 data using
tmap, mapview, or leaflet package and save it as a HTML file.
#mapview
# Convert the joined dataset to an sf object
joined_sf <- st_as_sf(joined_data, coords = c("lon", "lat"), crs = 4326)
# Create the interactive map using mapview
covid_map <- mapview(joined_sf,
zcol = c("PERCENT_POSITIVE", "elderlyPop"),
legend = TRUE,
map.types = c("OpenStreetMap", "Esri.WorldImagery"),
col.regions = list(colorRampPalette(c("lightblue", "darkblue")),
colorRampPalette(c("lightgreen", "darkgreen"))),
stroke.width = 0.5)
# Display the interactive map
covid_map
# Save the interactive map as an HTML file
mapview::mapshot(covid_map, 'covid_map_mapview.html')
#leaflet
# reproject, if needed
joined_data <- st_transform(joined_data, 4326)
pal_fun <- colorQuantile("YlOrRd", NULL, n = 5)
p_tip <- paste0("GEOID: ", joined_data$GEOID);
p_popup <- paste0("<strong>Noise Complaint Density: </strong>",
joined_data$elderlyPop%>%round(3)%>%format(nsmall = 3),
" /sqkm",
" <br/>",
sep="")
leaflet(joined_data) %>%
addPolygons(
stroke = FALSE,
fillColor = ~pal_fun(elderlyPop),
fillOpacity = 0.8, smoothFactor = 0.5,
label = p_tip,
popup = p_popup) %>%
addTiles()%>%
addLegend("bottomright",
pal=pal_fun,
values=~elderlyPop,
title = 'elderly Population <br> per sqkm')