require(vegan)
## Loading required package: vegan
## Warning: package 'vegan' was built under R version 4.4.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.4.3
## Loading required package: lattice
require(tidyverse)
## Loading required package: tidyverse
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
require(sf)
## Loading required package: sf
## Warning: package 'sf' was built under R version 4.4.3
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
require(mapview)
## Loading required package: mapview
## Warning: package 'mapview' was built under R version 4.4.3
require(magrittr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
##
## The following object is masked from 'package:purrr':
##
## set_names
##
## The following object is masked from 'package:tidyr':
##
## extract
library(dplyr)
library(ggplot2)
#Reading soil data for each borough, ensuring a uniform corrdinate reference system
bronxsoil<-st_read("NY005bronx/spatial/soilmu_a_ny005.shp")
## Reading layer `soilmu_a_ny005' from data source
## `C:\Users\Kami\Downloads\soil R project\NY005bronx\spatial\soilmu_a_ny005.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1407 features and 4 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -73.9344 ymin: 40.7854 xmax: -73.74844 ymax: 40.91754
## Geodetic CRS: WGS 84
bronxsoil_crs<-st_transform(bronxsoil,4326)
view(bronxsoil_crs)
queenssoil<-st_read("NY081queens/spatial/soilmu_a_ny081.shp")
## Reading layer `soilmu_a_ny081' from data source
## `C:\Users\Kami\Downloads\soil R project\NY081queens\spatial\soilmu_a_ny081.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2540 features and 4 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -74.04372 ymin: 40.50893 xmax: -73.70021 ymax: 40.81217
## Geodetic CRS: WGS 84
queenssoil_crs<-st_transform(queenssoil,4326)
manhattansoil<-st_read("NY061newyork/spatial/soilmu_a_ny061.shp")
## Reading layer `soilmu_a_ny061' from data source
## `C:\Users\Kami\Downloads\soil R project\NY061newyork\spatial\soilmu_a_ny061.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 636 features and 4 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -74.03539 ymin: 40.68086 xmax: -73.90672 ymax: 40.88193
## Geodetic CRS: WGS 84
manhattansoil_crs<-st_transform(manhattansoil,4326)
statensoil<-st_read("NY085richmond/spatial/soilmu_a_ny085.shp")
## Reading layer `soilmu_a_ny085' from data source
## `C:\Users\Kami\Downloads\soil R project\NY085richmond\spatial\soilmu_a_ny085.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2364 features and 4 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -74.25897 ymin: 40.47664 xmax: -74.03463 ymax: 40.6515
## Geodetic CRS: WGS 84
statensoil_crs<-st_transform(statensoil,4326)
brooklynsoil<-st_read("NY047kings/spatial/soilmu_a_ny047.shp")
## Reading layer `soilmu_a_ny047' from data source
## `C:\Users\Kami\Downloads\soil R project\NY047kings\spatial\soilmu_a_ny047.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1106 features and 4 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -74.05669 ymin: 40.55034 xmax: -73.83292 ymax: 40.73936
## Geodetic CRS: WGS 84
brooklynsoil_crs<-st_transform(brooklynsoil,4326)
# vector of fips code and borough names
borough_names <- c(
"NY005" = "Bronx",
"NY047" = "Brooklyn",
"NY061" = "Manhattan",
"NY081" = "Queens",
"NY085" = "Staten Island"
)
#renaming the fips code to common borough names
bronxsoil_crs <- bronxsoil_crs %>%
mutate(AREASYMBOL = recode(AREASYMBOL, !!!borough_names))
queenssoil_crs <- queenssoil_crs %>%
mutate(AREASYMBOL = recode(AREASYMBOL, !!!borough_names))
manhattansoil_crs <- manhattansoil_crs %>%
mutate(AREASYMBOL = recode(AREASYMBOL, !!!borough_names))
statensoil_crs <- statensoil_crs %>%
mutate(AREASYMBOL = recode(AREASYMBOL, !!!borough_names))
brooklynsoil_crs <- brooklynsoil_crs %>%
mutate(AREASYMBOL = recode(AREASYMBOL, !!!borough_names))
# making them validd
bronx_valid <- st_make_valid(bronxsoil_crs)
queens_valid <- st_make_valid(queenssoil_crs)
manhattan_valid <- st_make_valid(manhattansoil_crs)
staten_valid <- st_make_valid(statensoil_crs)
brooklyn_valid <- st_make_valid(brooklynsoil_crs)
# combining data using rbind function
bronx_queens <- rbind(bronx_valid, queens_valid, manhattan_valid, staten_valid, brooklyn_valid)
#Examples of how the soil classes look in Queens and Bronx
plot(bronxsoil_crs["MUSYM"], main= "Soil Classes in the Bronx")
plot(queenssoil_crs["MUSYM"], main= "Soil Classes in Queens")
#visualizing combined data
plot(bronx_queens["MUSYM"])
# calculating area of each soil class and total soil area by borough
soilarea_boro <- bronx_queens %>%
mutate(soil_area = st_area(.)) %>% # core area calculation
st_drop_geometry() %>%
group_by(AREASYMBOL, MUSYM) %>%
summarize(total_soil_area = sum(soil_area, na.rm = TRUE)) %>% # this too
ungroup()
## `summarise()` has grouped output by 'AREASYMBOL'. You can override using the
## `.groups` argument.
# calculating Shannon-wiener index for each borough
shannon_borough<- soilarea_boro %>%
group_by(AREASYMBOL) %>%
summarize(shannon_diversity = diversity(total_soil_area, index = "shannon"))
# showing table/tibble
print("Shannon-Wiener Diversity Index per Borough:")
## [1] "Shannon-Wiener Diversity Index per Borough:"
print(shannon_borough)
## # A tibble: 5 × 2
## AREASYMBOL shannon_diversity
## <chr> <dbl>
## 1 Bronx 2.99
## 2 Brooklyn 2.94
## 3 Manhattan 2.74
## 4 Queens 3.20
## 5 Staten Island 2.79
# making Choropleth map
borough_divmap <- bronx_queens %>%
left_join(shannon_borough, by = "AREASYMBOL")
ggplot(borough_divmap) +
geom_sf(aes(fill = shannon_diversity)) +
scale_fill_viridis_c(option = "viridis", name = "Shannon Diversity") +
labs(title = "Shannon-Wiener Soil Diversity Index in NYC Boroughs") +
theme_minimal()
# simpsons index calculation
simpson_borough <- soilarea_boro %>%
group_by(AREASYMBOL) %>%
summarize(simpson_diversity = diversity(total_soil_area, index = "simpson"))
# showing table/tibble for simpson
print("Simpson's Diversity Index per Borough:")
## [1] "Simpson's Diversity Index per Borough:"
print(simpson_borough)
## # A tibble: 5 × 2
## AREASYMBOL simpson_diversity
## <chr> <dbl>
## 1 Bronx 0.883
## 2 Brooklyn 0.899
## 3 Manhattan 0.874
## 4 Queens 0.903
## 5 Staten Island 0.789
#
borough_divmap_simpson <- borough_divmap %>%
left_join(simpson_borough, by = "AREASYMBOL")
ggplot(borough_divmap_simpson) +
geom_sf(aes(fill = simpson_diversity)) +
scale_fill_viridis_c(option = "cividis", name = "Simpson Diversity") +
labs(title = "Simpson Soil Diversity Index in NYC Boroughs") +
theme_minimal()
## Visualizing Simpson’s and Shannon-Wiener Soil Diversity Index per
Census Block
# uploading census blocks
census_blks <- "C:/Users/Kami/Downloads/soil R project/nycb2020wi_25a/nycb2020wi_25a/nycb2020wi.shp"
census_blocks <- st_read(census_blks)
## Reading layer `nycb2020wi' from data source
## `C:\Users\Kami\Downloads\soil R project\nycb2020wi_25a\nycb2020wi_25a\nycb2020wi.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 37984 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 912287.1 ymin: 113279.3 xmax: 1067383 ymax: 273617.8
## Projected CRS: NAD83 / New York Long Island (ftUS)
blocks_transform <- st_transform(census_blocks, crs = st_crs(bronx_queens)) # uniform crs
blocksrvalid<-st_make_valid(blocks_transform)
# spatial join
soil_block <- st_join(blocksrvalid, bronx_queens, join = st_intersects)
soil_areablock <- soil_block %>%
mutate(soil_area = st_area(.)) %>%
st_drop_geometry() %>%
group_by(GEOID, AREASYMBOL, MUSYM) %>% # Keep AREASYMBOL here
summarize(tot_soilblock = sum(soil_area, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'GEOID', 'AREASYMBOL'. You can override
## using the `.groups` argument.
# shannons diversity per census block
shannon_block <- soil_areablock %>%
group_by(GEOID, AREASYMBOL) %>% # Group by borough as well
summarize(shannon_diversity = diversity(tot_soilblock, index = "shannon")) %>%
rename(Borough = AREASYMBOL) # Rename for clarity
## `summarise()` has grouped output by 'GEOID'. You can override using the
## `.groups` argument.
# simpsons diversity per census block
simpson_block <- soil_areablock %>%
group_by(GEOID, AREASYMBOL) %>% # Group by borough as well
summarize(simpson_diversity = diversity(tot_soilblock, index = "simpson")) %>%
rename(Borough = AREASYMBOL) # Rename for clarity
## `summarise()` has grouped output by 'GEOID'. You can override using the
## `.groups` argument.
# joining indices to census blocks
census_diversity_block <- blocks_transform %>%
left_join(shannon_block, by = "GEOID") %>%
left_join(simpson_block, by = "GEOID") %>%
replace_na(list(shannon_diversity = 0, simpson_diversity = 0)) # Handle blocks with no soil data
## Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 21 of `x` matches multiple rows in `y`.
## ℹ Row 15324 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
# Making choropleth map of Shannon per cb
ggplot(census_diversity_block) +
geom_sf(aes(fill = shannon_diversity), color = NA) +
scale_fill_viridis_c(option = "viridis", name = "Shannon Diversity") +
labs(title = "Shannon-Wiener Soil Diversity Index per Census Block") +
theme_minimal()
# Making choropleth map of Simpson per cb
ggplot(census_diversity_block) +
geom_sf(aes(fill = simpson_diversity), color = NA) +
scale_fill_viridis_c(option = "viridis", name = "Simpson Diversity") +
labs(title = "Simpson Soil Diversity Index per Census Block") +
theme_minimal()
#just to see the max and min
print("Shannon's Diversity Index")
## [1] "Shannon's Diversity Index"
summary(census_diversity_block$shannon_diversity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.6931 0.5326 0.6931 3.2916
print("Simpson's Diversity Index")
## [1] "Simpson's Diversity Index"
summary(census_diversity_block$simpson_diversity)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.5000 0.3416 0.5000 0.9568
diversity_longg <- shannon_borough %>%
rename(Shannon = shannon_diversity) %>%
left_join(simpson_borough %>% rename(Simpson = simpson_diversity), by = "AREASYMBOL") %>%
pivot_longer(cols = c(Shannon, Simpson), names_to = "Index", values_to = "Value")
# barplot
ggplot(diversity_longg, aes(x = AREASYMBOL, y = Value, fill = Index)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Soil Diversity Indices of NYC Boroughs",
x = "AREASYMBOL", y = "Index Value") +
theme_minimal() +
scale_fill_viridis_d()
#finding num of unique soils per boro
unique_soils <- bronx_queens %>%
group_by(AREASYMBOL) %>%
summarize(unique_soil_count = n_distinct(MUSYM))
# showing results
print("Number of Unique Soil Classes per Borough:")
## [1] "Number of Unique Soil Classes per Borough:"
print(unique_soils)
## Simple feature collection with 5 features and 2 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -74.25897 ymin: 40.47664 xmax: -73.70021 ymax: 40.91754
## Geodetic CRS: WGS 84
## # A tibble: 5 × 3
## AREASYMBOL unique_soil_count geometry
## <chr> <int> <POLYGON [°]>
## 1 Bronx 126 ((-73.86299 40.90147, -73.86632 40.90242, -73…
## 2 Brooklyn 117 ((-73.83532 40.60548, -73.83423 40.60677, -73…
## 3 Manhattan 69 ((-73.9071 40.87624, -73.90738 40.87631, -73.…
## 4 Queens 145 ((-73.72447 40.72449, -73.72346 40.72481, -73…
## 5 Staten Island 138 ((-74.15793 40.64365, -74.16142 40.64403, -74…
# calculating area of each soil class overall
soilarea_overall <- bronx_queens %>%
mutate(soil_area = st_area(.)) %>%
st_drop_geometry() %>%
group_by(MUSYM) %>%
summarize(total_area = sum(soil_area, na.rm = TRUE)) %>%
ungroup()
# calculating the total soil area in all nyc
total_area_summary <- soilarea_overall %>%
summarize(grnd_total_area = sum(total_area, na.rm = TRUE))
# getting the grand total area value
grnd_total_area <- total_area_summary$grnd_total_area
# Calculate percent coverage and keeping top 10
percent_coverall <- soilarea_overall %>%
mutate(percent_cover = (total_area / grnd_total_area) * 100) %>%
arrange(desc(percent_cover)) %>% # Arrange by percent coverage in descending order
top_n(10, percent_cover) # Select the top 10
# resulting table
print("Percent Coverage (in meters squared) of Top 10 Soil Classes in NYC:")
## [1] "Percent Coverage (in meters squared) of Top 10 Soil Classes in NYC:"
print(percent_coverall)
## # A tibble: 10 × 3
## MUSYM total_area percent_cover
## <chr> [m^2] [1]
## 1 W 336509647. 29.4
## 2 UFA 109008249. 9.52
## 3 UGB 72478070. 6.33
## 4 UGA 69461946. 6.07
## 5 UtA 60007200. 5.24
## 6 UoA 43492637. 3.80
## 7 UtB 36476694. 3.19
## 8 UmA 34470520. 3.01
## 9 UGBl 26854370. 2.35
## 10 Nag2 23149300. 2.02
# soil data into census blocks
soil_block <- st_join(blocksrvalid, bronx_queens, join = st_intersects)
# upload park data
parks_upload <- "C:/Users/Kami/Downloads/soil R project/Parks Properties_20250514/geo_export_1e52c339-338f-41fa-bbc2-2acaf4b9e25f.shp"
parks <- st_read(parks_upload)
## Reading layer `geo_export_1e52c339-338f-41fa-bbc2-2acaf4b9e25f' from data source `C:\Users\Kami\Downloads\soil R project\Parks Properties_20250514\geo_export_1e52c339-338f-41fa-bbc2-2acaf4b9e25f.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 2054 features and 34 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25609 ymin: 40.49449 xmax: -73.70905 ymax: 40.91133
## Geodetic CRS: WGS 84
parks_transform <- st_transform(parks, crs = st_crs(blocks_transform)) # uniform crs
# park geometries are valid
parks_valid <- st_make_valid(parks_transform)
parks_centroids <- st_centroid(parks_valid)
## Warning: st_centroid assumes attributes are constant over geometries
#spatial join wasnt working so i resorted to this
intersection <- st_intersects(blocksrvalid, parks_centroids)
parkslist <- lengths(intersection)
parkblocks <- tibble(
GEOID = blocksrvalid$GEOID,
n_parks = parkslist
)
# joining park count to the census div data
census_divparks <- census_diversity_block %>%
left_join(parkblocks, by = "GEOID") %>%
replace_na(list(n_parks = 0))
# creating categories for shannon diversity using quantiles
census_divparks <- census_divparks %>%
mutate(shannon_cat = case_when(
shannon_diversity <= quantile(shannon_diversity, 1/3, na.rm = TRUE) ~ "Low",
shannon_diversity <= quantile(shannon_diversity, 2/3, na.rm = TRUE) ~ "Medium",
TRUE ~ "High"
))
# creating categories of park density using quantiles again
census_divparks <- census_divparks %>%
mutate(parks_cat = case_when(
n_parks <= quantile(n_parks, 1/3, na.rm = TRUE) ~ "Low",
n_parks <= quantile(n_parks, 2/3, na.rm = TRUE) ~ "Medium",
TRUE ~ "High"
))
# combining/creating a bivariate categories
census_divparks <- census_divparks %>%
mutate(bivariate_cat = interaction(shannon_cat, parks_cat, sep = "-"))
bivariate_colors_yg <- c(
"Low-Low" = "#ffffcc",
"Medium-Low" = "#d9f0a3",
"High-Low" = "#addd8e",
"Low-Medium" = "#f7fcb9",
"Medium-Medium" = "#d9f0a3",
"High-Medium" = "#74c476",
"Low-High" = "#addd8e",
"Medium-High" = "#41ab5d",
"High-High" = "#238443"
)
# final bivariate map
print(ggplot() +
geom_sf(data = census_divparks, aes(fill = bivariate_cat), color = NA) +
scale_fill_manual(values = bivariate_colors_yg, name = "Shannon to Parks") +
labs(
title = "Bivariate Map of Shannon Diversity and Park Density in NYC per Census Block",
fill = "Shannon - Parks (Low-Med-High)"
) +
theme_minimal())