GTECH_78520_2023s_week11_Melissa.linares03

#3.7 Lab Assignment #The third and last R-spatial lab is to visualize data that we assembled during the first two labs. As geovisualization is often exploratory, you are encouraged to be more creative.

Load packages

task 1:Plot at least two high-quality static maps with one using the COVID-19 data and one using a related factor. You can use either plot method for sf or ggplot method.

########################## MAP 1: COVID-19 data #######################################

# Read COVID-19 data for NYC
nyc_covid_data <- readr::read_csv("./R-Spatial_II_Lab/tests-by-zcta_2021_04_23.csv", lazy = FALSE,show_col_types = FALSE)
str(nyc_covid_data)

## spc_tbl_ [177 × 13] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ MODIFIED_ZCTA    : num [1:177] 10001 10002 10003 10004 10005 ...
##  $ NEIGHBORHOOD_NAME: chr [1:177] "Chelsea/NoMad/West Chelsea" "Chinatown/Lower East Side" "East Village/Gramercy/Greenwich Village" "Financial District" ...
##  $ BOROUGH_GROUP    : chr [1:177] "Manhattan" "Manhattan" "Manhattan" "Manhattan" ...
##  $ label            : chr [1:177] "10001, 10118" "10002" "10003" "10004" ...
##  $ lat              : num [1:177] 40.8 40.7 40.7 40.7 40.7 ...
##  $ lon              : num [1:177] -74 -74 -74 -74 -74 ...
##  $ COVID_CASE_COUNT : num [1:177] 1542 5902 2803 247 413 ...
##  $ COVID_CASE_RATE  : num [1:177] 5584 7836 5193 8311 4716 ...
##  $ POP_DENOMINATOR  : num [1:177] 27613 75323 53978 2972 8757 ...
##  $ COVID_DEATH_COUNT: num [1:177] 35 264 48 2 0 1 4 118 37 62 ...
##  $ COVID_DEATH_RATE : num [1:177] 126.8 350.5 88.9 67.3 0 ...
##  $ PERCENT_POSITIVE : num [1:177] 7.86 12.63 6.93 6.92 6.72 ...
##  $ TOTAL_COVID_TESTS: num [1:177] 20158 48197 41076 3599 6102 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   MODIFIED_ZCTA = col_double(),
##   ..   NEIGHBORHOOD_NAME = col_character(),
##   ..   BOROUGH_GROUP = col_character(),
##   ..   label = col_character(),
##   ..   lat = col_double(),
##   ..   lon = col_double(),
##   ..   COVID_CASE_COUNT = col_double(),
##   ..   COVID_CASE_RATE = col_double(),
##   ..   POP_DENOMINATOR = col_double(),
##   ..   COVID_DEATH_COUNT = col_double(),
##   ..   COVID_DEATH_RATE = col_double(),
##   ..   PERCENT_POSITIVE = col_double(),
##   ..   TOTAL_COVID_TESTS = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

# Read shapefile for NYC zip
nyc_shapefile <-st_read("./ZIP_CODE_040114/ZIP_CODE_040114.shp")

## Reading layer `ZIP_CODE_040114' from data source 
##   `C:\Users\melin\Documents\GTECH78520\part3\ZIP_CODE_040114\ZIP_CODE_040114.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 263 features and 12 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: 913129 ymin: 120020.9 xmax: 1067494 ymax: 272710.9
## Projected CRS: NAD83 / New York Long Island (ftUS)

# Merge COVID-19 data with the shapefile
nyc_COVID_sf <- merge(nyc_shapefile, nyc_covid_data,  by.x = "ZIPCODE", by.y = "MODIFIED_ZCTA")

#qick plot to look at the data
plot(nyc_COVID_sf,max.plot = 24 )

## ggplot method
# look at how the values are distributed 
hist(nyc_COVID_sf$PERCENT_POSITIVE,
     breaks = 10,        
     col = "blue",       
     main = "Histogram", 
     xlab = "PERCENT_POSITIVE", 
     ylab = "Frequency" 
)

# First create the quantile breaks
breaks_qt <- classIntervals(c(min(nyc_COVID_sf$PERCENT_POSITIVE) - .00001,nyc_COVID_sf$PERCENT_POSITIVE), n = 5, style = "quantile")

# Use cut to divide percent postive into intervals
nyc_COVID_sf <- mutate(nyc_COVID_sf, 
                       nyc_COVID_cut = cut(PERCENT_POSITIVE, breaks_qt$brks, dig.lab = 4, digits=1)) 

# Plot results

ggplot(nyc_COVID_sf) + 
  geom_sf(aes(fill = nyc_COVID_cut)) +
  scale_fill_brewer(palette = "OrRd", name = "Percent Positive") +
  labs(x = 'Longitude', y = 'Latitude', title = 'COVID-19 Positive Cases in NYC', caption = 'Data Source: [NYC Department of Health and Mental Hygiene (DOHMH) ]') +
  theme_minimal() +
  theme(legend.position = "top", plot.title = element_text(size = 16, face = "bold"), axis.title = element_text(size = 14))

########################## MAP 1: Elderly Population #######################################
# Load the NYC census tracts from Department of Planning
nycCensus <- sf::st_read('./R-Spatial_II_Lab/geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp', stringsAsFactors = FALSE)

## Reading layer `geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a' from data source `C:\Users\melin\Documents\GTECH78520\part3\R-Spatial_II_Lab\geo_export_1dc7b645-647b-4806-b9a0-7b79660f120a.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 2165 features and 11 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS:  WGS84(DD)

str(nycCensus)

## Classes 'sf' and 'data.frame':   2165 obs. of  12 variables:
##  $ boro_code : chr  "5" "1" "1" "1" ...
##  $ boro_ct201: chr  "5000900" "1009800" "1010000" "1010200" ...
##  $ boro_name : chr  "Staten Island" "Manhattan" "Manhattan" "Manhattan" ...
##  $ cdeligibil: chr  "E" "I" "I" "I" ...
##  $ ct2010    : chr  "000900" "009800" "010000" "010200" ...
##  $ ctlabel   : chr  "9" "98" "100" "102" ...
##  $ ntacode   : chr  "SI22" "MN19" "MN19" "MN17" ...
##  $ ntaname   : chr  "West New Brighton-New Brighton-St. George" "Turtle Bay-East Midtown" "Turtle Bay-East Midtown" "Midtown-Midtown South" ...
##  $ puma      : chr  "3903" "3808" "3808" "3807" ...
##  $ shape_area: num  2497010 1906016 1860938 1860993 1864600 ...
##  $ shape_leng: num  7729 5534 5692 5688 5693 ...
##  $ geometry  :sfc_MULTIPOLYGON of length 2165; first list element: List of 1
##   ..$ :List of 1
##   .. ..$ : num [1:28, 1:2] -74.1 -74.1 -74.1 -74.1 -74.1 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTIPOLYGON" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "boro_code" "boro_ct201" "boro_name" "cdeligibil" ...

# does not use the standard County FIPS, but boro code
nycCensus %<>% dplyr::mutate(cntyFIPS = case_when(
  boro_name == 'Bronx' ~ '005',
  boro_name == 'Brooklyn' ~ '047',
  boro_name == 'Manhattan' ~ '061',
  boro_name == 'Queens' ~ '081',
  boro_name == 'Staten Island' ~ '085'),
  tractFIPS = paste(cntyFIPS, ct2010, sep='')
)

# Load the ACS data from CENSUS website
acsData <- readLines("./R-Spatial_II_Lab/ACSDP5Y2018.DP05_data_with_overlays_2020-04-22T132935.csv") %>%
  magrittr::extract(-2) %>% 
  textConnection() %>%
  read.csv(header=TRUE, quote= "\"") %>%
  dplyr::select(GEO_ID, 
                totPop = DP05_0001E, elderlyPop = DP05_0024E) %>%
  dplyr::mutate(censusCode = stringr::str_sub(GEO_ID, -9,-1));

# now extract columns 1 through 10 from the acsData data frame
acsData %>%
  magrittr::extract(1:10,)

##                  GEO_ID totPop elderlyPop censusCode
## 1  1400000US36005000100   7080         51  005000100
## 2  1400000US36005000200   4542        950  005000200
## 3  1400000US36005000400   5634        710  005000400
## 4  1400000US36005001600   5917        989  005001600
## 5  1400000US36005001900   2765         76  005001900
## 6  1400000US36005002000   9409        977  005002000
## 7  1400000US36005002300   4600        648  005002300
## 8  1400000US36005002400    172          0  005002400
## 9  1400000US36005002500   5887        548  005002500
## 10 1400000US36005002701   2868        243  005002701

# Join ACS data to the census tracts
popData <- merge(nycCensus, acsData, by.x ='tractFIPS', by.y = 'censusCode')

# Transform popData data to match the CRS of zipcode data
popNYC <- sf::st_transform(popData, st_crs(nyc_shapefile)) 


# Now aggregate to the zip code level
PopZipNYC <- sf::st_join(nyc_shapefile, 
                              popNYC %>% sf::st_centroid(),
                              join = st_contains) %>% 
  group_by(ZIPCODE, PO_NAME, POPULATION, COUNTY) %>% 
  summarise(totPop = sum(totPop),
            elderlyPop = sum(elderlyPop)
            )

## Warning: st_centroid assumes attributes are constant over geometries

## `summarise()` has grouped output by 'ZIPCODE', 'PO_NAME', 'POPULATION'. You can
## override using the `.groups` argument.

PopZipNYC %>% head()

## Simple feature collection with 6 features and 6 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 188447.3 xmax: 998309.7 ymax: 230942.5
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 6 × 7
## # Groups:   ZIPCODE, PO_NAME, POPULATION [6]
##   ZIPCODE PO_NAME      POPULAT…¹ COUNTY totPop elder…²                  geometry
##   <chr>   <chr>            <dbl> <chr>   <int>   <int> <GEOMETRY [US_survey_foo>
## 1 00083   Central Park        25 New Y…      3       0 POLYGON ((998309.7 22961…
## 2 10001   New York         22413 New Y…  19146    2500 POLYGON ((981958.6 21346…
## 3 10002   New York         81305 New Y…  74310   15815 POLYGON ((991339.9 20757…
## 4 10003   New York         55878 New Y…  53487    6296 POLYGON ((989830.5 20704…
## 5 10004   New York          2187 New Y…     NA      NA MULTIPOLYGON (((977503.7…
## 6 10005   New York          8107 New Y…   8809     209 POLYGON ((982595.7 19588…
## # … with abbreviated variable names ¹POPULATION, ²elderlyPop

# See where we have those NAs 
PopZipNYC %>% dplyr::filter(is.na(totPop))

## Simple feature collection with 68 features and 6 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: 971132.6 ymin: 151085.5 xmax: 1067494 ymax: 263362
## Projected CRS: NAD83 / New York Long Island (ftUS)
## # A tibble: 68 × 7
## # Groups:   ZIPCODE, PO_NAME, POPULATION [67]
##    ZIPCODE PO_NAME  POPULATION COUNTY   totPop elder…¹                  geometry
##  * <chr>   <chr>         <dbl> <chr>     <int>   <int> <GEOMETRY [US_survey_foo>
##  1 10004   New York       2187 New York     NA      NA MULTIPOLYGON (((977503.7…
##  2 10020   New York          0 New York     NA      NA POLYGON ((990566.4 21563…
##  3 10041   New York          0 New York     NA      NA POLYGON ((981406.8 19545…
##  4 10043   New York          0 New York     NA      NA POLYGON ((982286 195715.…
##  5 10045   New York          0 New York     NA      NA POLYGON ((981726 197499.…
##  6 10047   New York          0 New York     NA      NA POLYGON ((988037.5 20872…
##  7 10048   New York          0 New York     NA      NA POLYGON ((981026.8 19786…
##  8 10055   New York         12 New York     NA      NA POLYGON ((991432.1 21589…
##  9 10075   New York      25203 New York     NA      NA POLYGON ((998799 219864.…
## 10 10080   New York          0 New York     NA      NA POLYGON ((980680.1 19813…
## # … with 58 more rows, and abbreviated variable name ¹elderlyPop

## ggplot method
# Plot results
ggplot() +
  geom_sf(data = PopZipNYC, aes(fill = elderlyPop)) +
  scale_fill_gradient(name = 'Elderly Population', low = 'white', high = 'red') +
  labs(x = 'Longitude', y = 'Latitude', title = 'Elderly Population in NYC', caption = 'Data Source: [CENSUS website]') +
  theme_minimal() +
  theme( plot.title = element_text(size = 16, face = "bold"), axis.title = element_text(size = 14))

task 2: Use ggplot2 and other ggplot-compatible packages to create a multi-map figure illustrating the possible relationship between COVID-19 confirmed cases or rate and another factor (e.g., the number of nursing homes, number of food stores, neighborhood racial composition, elderly population, etc.). The maps should be put side by side on one single page. Add graticule to at least one of those maps and label some of the feature on the map where applicable and appropriate.

#join covid and population data
joined_data <- st_join(nyc_COVID_sf, PopZipNYC, by = "ZIPCODE", join = st_equals)


# Create the map for COVID-19 confirmed cases
map_covid <- ggplot() +
  geom_sf(data = joined_data, aes(fill = PERCENT_POSITIVE), color = 'gray') +
  scale_fill_gradient(low = "white", high = "red") +
  labs(title = 'COVID-19 Confirmed Cases') +
  theme_minimal() +
  theme(legend.position = "top", plot.title = element_text(size = 16, face = "bold"), axis.title = element_text(size = 14))

# Create the map for the elderly population
map_elderly <- ggplot() +
  geom_sf(data = joined_data, aes(fill = elderlyPop), color = 'gray') +
  scale_fill_gradient(low = "white", high = "blue") +
  labs(title = 'Elderly Population in NYC') +
  theme_minimal() +
  theme(legend.position = "top", plot.title = element_text(size = 16, face = "bold"), axis.title = element_text(size = 14))

# Arrange the maps side by side on a single page
multi_map_figure <- map_covid + map_elderly + plot_layout(ncol = 2)

# Display the multi-map figure
multi_map_figure

task 3: Create a web-based interactive map for COIVD-19 data using tmap, mapview, or leaflet package and save it as a HTML file.

#mapview

# Convert the joined dataset to an sf object
joined_sf <- st_as_sf(joined_data, coords = c("lon", "lat"), crs = 4326)

# Create the interactive map using mapview
covid_map <- mapview(joined_sf,
                     zcol = c("PERCENT_POSITIVE", "elderlyPop"),
                     legend = TRUE,
                     map.types = c("OpenStreetMap", "Esri.WorldImagery"),
                     col.regions = list(colorRampPalette(c("lightblue", "darkblue")),
                                        colorRampPalette(c("lightgreen", "darkgreen"))),
                     stroke.width = 0.5)

 

# Display the interactive map
covid_map

# Save the interactive map as an HTML file

mapview::mapshot(covid_map, 'covid_map_mapview.html')

#leaflet

# reproject, if needed
joined_data <- st_transform(joined_data, 4326)


pal_fun <- colorQuantile("YlOrRd", NULL, n = 5)

p_tip <- paste0("GEOID: ", joined_data$GEOID);

p_popup <- paste0("<strong>Noise Complaint Density: </strong>", 
                  joined_data$elderlyPop%>%round(3)%>%format(nsmall = 3), 
                  " /sqkm",
                  " <br/>",
                 
                  sep="")

leaflet(joined_data) %>%
  addPolygons(
    stroke = FALSE, 
    fillColor = ~pal_fun(elderlyPop),
    fillOpacity = 0.8, smoothFactor = 0.5, 
    label = p_tip,
    popup = p_popup)   %>%
  addTiles()%>%
  addLegend("bottomright",  
            pal=pal_fun,   
            values=~elderlyPop,  
            title = 'elderly Population <br> per sqkm')

GTECH_78520_2023s_week11_Melissa.linares03

Melissa Linares

5/15/2023

Load packages

task 3: Create a web-based interactive map for COIVD-19 data using tmap, mapview, or leaflet package and save it as a HTML file.