Final Project - Earthworms Capstone Figures

##Downloads

require(tidyverse); require(magrittr);
require(sp);require(sf);
require(classInt);require(RColorBrewer);
require(ggplot2);require(ggmap);require(leaflet);
require(ggrepel);
library(ggpubr); library(devtools);
require(htmlwidgets); require(mapview); require(gt)

setwd("~/Desktop/spring 2025/GTECH 385/Final Project")

Week 1: Reading in the data & data cleanup

#reading in the ignition data (tells us the organic matter content of the soils)

ig_loss <- readr::read_csv("Ignition_Loss.csv", lazy = FALSE)

## Rows: 108 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Sample_ID, Plot Type, Depth, Date
## dbl (7): Crucible_No, Crucible_Mass, Crucible_Soil, Soil_Initial, Final_Crucible_Soil, Final_Soil, Loss
## lgl (1): Org_Mat
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(ig_loss)

## spc_tbl_ [108 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Sample_ID          : chr [1:108] "BL1" "BL1" "BL2" "BL2" ...
##  $ Plot Type          : chr [1:108] NA NA NA NA ...
##  $ Depth              : chr [1:108] "A" "B" "A" "B" ...
##  $ Crucible_No        : num [1:108] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Crucible_Mass      : num [1:108] 42.1 47.2 38.2 39 38.3 ...
##  $ Crucible_Soil      : num [1:108] 44.6 50.1 40.5 41.6 40.4 ...
##  $ Soil_Initial       : num [1:108] 2.53 2.89 2.31 2.63 2.12 ...
##  $ Final_Crucible_Soil: num [1:108] 44.2 49.9 40.2 41.4 40.1 ...
##  $ Final_Soil         : num [1:108] 2.18 2.67 1.97 2.37 1.84 ...
##  $ Loss               : num [1:108] 0.348 0.218 0.339 0.254 0.275 0.211 0.379 0.428 0.287 0.202 ...
##  $ Org_Mat            : logi [1:108] NA NA NA NA NA NA ...
##  $ Date               : chr [1:108] "2/10/25" "2/10/25" "2/10/25" "2/10/25" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Sample_ID = col_character(),
##   ..   `Plot Type` = col_character(),
##   ..   Depth = col_character(),
##   ..   Crucible_No = col_double(),
##   ..   Crucible_Mass = col_double(),
##   ..   Crucible_Soil = col_double(),
##   ..   Soil_Initial = col_double(),
##   ..   Final_Crucible_Soil = col_double(),
##   ..   Final_Soil = col_double(),
##   ..   Loss = col_double(),
##   ..   Org_Mat = col_logical(),
##   ..   Date = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

#reading in the C:N data (tells us about carbon versus nitrogen content of the soils)

cn_ratio <- readr::read_csv("Leaf_LitterCN.csv", lazy = FALSE)

## Rows: 54 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Sorted, Name, Sample Month
## dbl (5): Run_No, Weight, Perc_N, Perc_C, C_N_Ratio
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(cn_ratio)

## spc_tbl_ [54 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Sorted      : chr [1:54] "Y" "Y" "N" "N" ...
##  $ Run_No      : num [1:54] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Weight      : num [1:54] 250 249 252 251 251 ...
##  $ Name        : chr [1:54] "BL1" "BL1" "BL2" "BL2" ...
##  $ Sample Month: chr [1:54] "June" "June" "June" "June" ...
##  $ Perc_N      : num [1:54] 1.26 1.25 1.8 1.76 1.58 ...
##  $ Perc_C      : num [1:54] 45.5 45.6 39.8 39.1 42.3 ...
##  $ C_N_Ratio   : num [1:54] 36.2 36.6 22.1 22.2 26.8 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Sorted = col_character(),
##   ..   Run_No = col_double(),
##   ..   Weight = col_double(),
##   ..   Name = col_character(),
##   ..   `Sample Month` = col_character(),
##   ..   Perc_N = col_double(),
##   ..   Perc_C = col_double(),
##   ..   C_N_Ratio = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

#reading in the soil pH data 

soil_pH <- readr::read_csv("SoilpH.csv", lazy = FALSE)

## Rows: 36 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): SampleID, Type, Plot, Depth, Forest_Type, Tube, Date
## dbl (2): Soil_Mass, pH
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(soil_pH)

## spc_tbl_ [36 × 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ SampleID   : chr [1:36] "B1_1" "B1_1" "B1_2" "B1_2" ...
##  $ Type       : chr [1:36] "Field" "Field" "Field" "Field" ...
##  $ Plot       : chr [1:36] "BL1" "BL1" "BL1" "BL1" ...
##  $ Depth      : chr [1:36] "0 to 5" "0 to 5" "5 to 10" "5 to 10" ...
##  $ Forest_Type: chr [1:36] "Black Locust" "Black Locust" "Black Locust" "Black Locust" ...
##  $ Tube       : chr [1:36] "4-1" "4-2" "11-1" "11-2" ...
##  $ Soil_Mass  : num [1:36] 15 15 15 15 15 ...
##  $ Date       : chr [1:36] "10/11/24" "10/11/24" "10/11/24" "10/11/24" ...
##  $ pH         : num [1:36] 5.8 5.39 5.37 5.21 5.73 5.68 8.2 8.43 6.03 6.33 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   SampleID = col_character(),
##   ..   Type = col_character(),
##   ..   Plot = col_character(),
##   ..   Depth = col_character(),
##   ..   Forest_Type = col_character(),
##   ..   Tube = col_character(),
##   ..   Soil_Mass = col_double(),
##   ..   Date = col_character(),
##   ..   pH = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

#creating individual dataframes for each forest stand (3 total, Black Locust, Native Forest, and Norway Maple) with consistent naming conventions. All new data frames below will have a column labeled "Forest_Type" that will use the same label for the forest stand in question.

library(magrittr)
library(dplyr)

#######################Black Locust data######################## 

#pH
BL_pH <- soil_pH[which(soil_pH$Forest_Type=="Black Locust"),]

#CN 
BL_lab_cn <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^B')==T ~ 'Black Locust',TRUE ~ "N")) 
BL_lab_cn <- cbind(BL_lab_cn,cn_ratio)
BL_CN <- BL_lab_cn[which(BL_lab_cn$Forest_Type=="Black Locust"),]

#ignition loss (using the preliminary data right now, some more observations need to be added)
BL_lab_ig <- ig_loss %>% select(Sample_ID) %>% mutate(Forest_Type=case_when(str_detect(Sample_ID,'^BL')==T ~ 'Black Locust',TRUE ~ "N"))
BL_lab_ig <- cbind(BL_lab_ig,ig_loss)
BL_ig <- BL_lab_ig[which(BL_lab_ig$Forest_Type=="Black Locust"),]

# All BL data combined. I need to use merge because of different row length in the CN data), so I begin by using cbind for the ig and pH data, which have the same number of rows. This is because the merge function only takes two data frames at a time (I got an error when I tried to do all three at once.)

BL_pHandig <-cbind(BL_pH,BL_ig)
BL_data <- merge(data.frame(BL_pHandig, row.names=NULL), data.frame(BL_CN, row.names=NULL), by=0, all=TRUE) [-1] 

########################Native Forest pH data############################ 

#pH
NF_pH <- soil_pH[which(soil_pH$Forest_Type=="Native Forest"),]

#CN 
NF_lab_cn <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^NF')==T ~ 'Native Forest',TRUE ~ "N")) 
NF_lab_cn <- cbind(NF_lab_cn,cn_ratio)
NF_CN <- NF_lab_cn[which(NF_lab_cn$Forest_Type=="Native Forest"),]

#ignition loss (using the preliminary data right now, some more observations need to be added)
NF_lab_ig <- ig_loss %>% select(Sample_ID) %>% mutate(Forest_Type=case_when(str_detect(Sample_ID,'^NF')==T ~ 'Native Forest',TRUE ~ "N"))
NF_lab_ig <- cbind(NF_lab_ig,ig_loss)
NF_ig <- NF_lab_ig[which(NF_lab_ig$Forest_Type=="Native Forest"),]

# All NF data combined. Here I can just use cbind because the three data frames have the same number of rows.
NF_data <- cbind(NF_CN,NF_ig,NF_pH)

#########################Norway Maple pH data###########################

#pH
NM_pH <- soil_pH[which(soil_pH$Forest_Type=="Norway Maple"),]

#CN 
NM_lab_cn <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^NM')==T ~ 'Norway Maple',TRUE ~ "N")) 

NM_lab_cn2 <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^No')==T ~ 'Norway Maple',TRUE ~ "N")) 

NM_lab_cn <- bind_rows(NM_lab_cn,NM_lab_cn2)

cn2 <- cn_ratio
cn2 <- bind_rows(cn2,cn_ratio)
NM_lab_cn <- cbind(NM_lab_cn,cn2)
NM_CN <- NM_lab_cn[which(NM_lab_cn$Forest_Type=="Norway Maple"),]

#ignition loss (using the preliminary data right now, some more observations need to be added)
NM_lab_ig <- ig_loss %>% select(Sample_ID) %>% mutate(Forest_Type=case_when(str_detect(Sample_ID,'^NM')==T ~ 'Norway Maple',TRUE ~ "N"))
NM_lab_ig <- cbind(NM_lab_ig,ig_loss)
NM_ig <- NM_lab_ig[which(NM_lab_ig$Forest_Type=="Norway Maple"),]

# All NM data combined. Here I have to use merge again because each data frame has a different number of rows. 
NM_dat1 <- merge(data.frame(NM_pH, row.names=NULL), data.frame(NM_CN, row.names=NULL), by=0, all=TRUE) [-1] 
NM_data <- merge(data.frame(NM_dat1, row.names=NULL), data.frame(NM_ig, row.names=NULL), by=0, all=TRUE) [-1] 

# Finally, I will create a data frame per category (i.e. all corrected pH data in one df, all CN together, etc.)

pH <- bind_rows(BL_pH,NF_pH,NM_pH)
CN <- bind_rows(BL_CN,NF_CN,NM_CN)

## New names:
## New names:
## New names:
## • `Name` -> `Name...1`
## • `Name` -> `Name...6`

ign <- bind_rows(BL_ig,NF_ig,NM_ig)

## New names:
## New names:
## New names:
## • `Sample_ID` -> `Sample_ID...1`
## • `Sample_ID` -> `Sample_ID...3`

#creating a data frame with all of the data frames combined. For this, I will have to use the merge function twice.

all_ver1 <- merge(data.frame(pH, row.names=NULL), data.frame(CN, row.names=NULL), by=0, all=TRUE) [-1] 
all_data <- merge (data.frame(all_ver1, row.names=NULL), data.frame(ign, row.names=NULL), by=0, all=TRUE) [-1] 

#Now, I have data frames containing data for each forest stand, all with a consistent label for each data type, making them easier to work with in my analysis for Week 2.

##Week 2: Boxplots, Range plots, and stat analyses

#########################CN boxplots (CN Ratio by dominant Forest Stand)############################

ggplot(CN,aes(as.factor(Forest_Type),C_N_Ratio))+geom_boxplot(col=c("red","darkgreen","blue"))+labs(x='Forest Type',y='C:N Ratio',title="Carbon Nitrogen (C:N) Ratios in Leaf Litter, by Forest Type")

##EDITS: assign colors to each Forest Type in the box plot, to be used in the range plot below

####################################Horizontal range plot for pH######################################### 
library(ggplot2)

#now I am finding the lowest and highest values for each forest stand pH, so I may create my range plot. I was having trouble doing it in a similar way to how I created the boxplot, I keep getting an error message. 

lowest_BL <- which.min(BL_pH$pH)
print(lowest_BL)

## [1] 4

#5.21 is the lowest pH in BL
highest_BL <- which.max(BL_pH$pH)
print(highest_BL)

## [1] 8

#8.43 is the highest pH in BL 
mean(BL_pH$pH)

## [1] 6.479167

#6.48 is the mean pH 

lowest_NF  <- which.min(NF_pH$pH)
print(lowest_NF)

## [1] 8

#4.04 is the lowest pH in NF
highest_NF <- which.max(NF_pH$pH)
print(highest_NF)

## [1] 11

#5.57 is the highest pH in NF
mean(NF_pH$pH)

## [1] 4.915

#4.92 is the mean pH

lowest_NM  <- which.min(NM_pH$pH)
print(lowest_NM)

## [1] 8

#4.46 is the lowest pH in NM
highest_NM <- which.max(NM_pH$pH)
print(highest_NM)

## [1] 11

#5.95 is the highest pH in NM
mean(NM_pH$pH)

## [1] 5.156667

#5.16 is the mean pH

install.packages(ggplot2)

## Error in install.packages : object 'ggplot2' not found

library(ggplot2)

par(mar = c(1, 19, 1, 19))
d=data.frame(Forest_Type=c("Black Locust","Native Forest","Norway Maple"), Soil_pH=c(6.48,4.92,5.16), lower=c(5.21,4.04,4.46), upper=c(8.43,5.57,5.95))
ggplot() + 
geom_pointrange(data=d, mapping=aes(x=Forest_Type , y=Soil_pH, ymin=upper, ymax=lower), width=0.2, size=1, col=c("red","darkgreen","blue"), shape=22) + labs(title="Soil pH by Forest Type")

## Warning in geom_pointrange(data = d, mapping = aes(x = Forest_Type, y = Soil_pH, : Ignoring unknown
## parameters: `width`

#violin plot (alternative to the pH rangeplot)
pH_violin <- ggplot(pH,aes(x=Forest_Type,y=pH))+geom_violin(trim=FALSE) + labs(title="Soil pH by Forest Type")
pH_violin + stat_summary(fun.data=mean_sdl, geom="pointrange",size=0.5,color=c("red","darkgreen","blue"))

##############################Organic Matter from LOI####################################

#creating a new column to calculate percentage of organic matter
ignition <- ign %>% mutate(Per_OM=Loss/Soil_Initial*100)

all_dat <- merge (data.frame(all_data, row.names=NULL), data.frame(ignition, row.names=NULL), by=0, all=TRUE) [-1]

## Warning in merge.data.frame(data.frame(all_data, row.names = NULL), data.frame(ignition, : column names
## 'Forest_Type.x', 'Forest_Type.y' are duplicated in the result

ggplot(ignition,aes(as.factor(Forest_Type),Per_OM))+geom_boxplot(col=c("red","darkgreen","blue"))+labs(x='Forest Type',y='Percentage of Organic Matter',title="Soil Organic Matter Content, by Forest Type")

############################Data table##################################
library(tidyverse)
library(RColorBrewer)
install.packages('gtExtras')

## Error in install.packages : Updating loaded packages

library(gtExtras)

plot <- all_dat %>% 
  rename(`Forest Type` = Forest_Type.x) %>%
  group_by(`Forest Type`) %>%
  reframe(`Soil pH` = range(pH),
            `Percent Organic Matter (Leaf Litter)` = mean(Per_OM),
            `Average CN Ratio (Leaf Litter)` = round(mean(C_N_Ratio),digits=2)) %>%
  gt() %>%
  tab_header(title = 'Soil and Leaf Litter Characteristics in Inwood Hill Park, by Forest Type') %>%
  cols_align(align="left")

#Mapping disturbances

#read in shapefile of parks trails across NYC (I received this data from one of my lab peers - I don't have the original data source) 
park_trails <- st_read("Parks_Trails_20250403/geo_export_a84f1b35-c123-45cc-8677-2632acfdbb06.shp")

## Reading layer `geo_export_a84f1b35-c123-45cc-8677-2632acfdbb06' from data source 
##   `/Users/jiadavalenza/Desktop/spring 2025/GTECH 385/Final Project/Parks_Trails_20250403/geo_export_a84f1b35-c123-45cc-8677-2632acfdbb06.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 6479 features and 11 fields
## Geometry type: MULTILINESTRING
## Dimension:     XY
## Bounding box:  xmin: -74.25515 ymin: 40.49653 xmax: -73.73532 ymax: 40.91046
## Geodetic CRS:  WGS 84

str(park_trails)

## Classes 'sf' and 'data.frame':   6479 obs. of  12 variables:
##  $ park_name : chr  "Spring Creek Park" "Spring Creek Park" "Spring Creek Park" "Spring Creek Park" ...
##  $ width_ft  : chr  "Over 8 feet" "Over 8 feet" "4 feet to less than 6 feet" "Over 8 feet" ...
##  $ class     : chr  "Class V : Fully Developed" "Class V : Fully Developed" "Class V : Fully Developed" "Class V : Fully Developed" ...
##  $ surface   : chr  "Paved" "Paved" "Paved" "Paved" ...
##  $ gen_topog : chr  "Level" "Level" "Level" "Level" ...
##  $ difficulty: chr  "1: flat and smooth" "1: flat and smooth" "1: flat and smooth" "1: flat and smooth" ...
##  $ date_date_: Date, format: "2022-03-31" "2022-03-31" "2022-03-31" ...
##  $ time_date_: chr  "13:15:39.000" "13:12:15.000" "13:04:19.000" "13:01:20.000" ...
##  $ trail_name: chr  "Blue Trail" "Unnamed Official Trail" "Unnamed Official Trail" "Blue Trail" ...
##  $ parkid    : chr  "B371" "B371" "B371" "B371" ...
##  $ trailmarke: chr  "No" "No" "No" "No" ...
##  $ geometry  :sfc_MULTILINESTRING of length 6479; first list element: List of 1
##   ..$ : num [1:8, 1:2] -73.9 -73.9 -73.9 -73.9 -73.9 ...
##   ..- attr(*, "class")= chr [1:3] "XY" "MULTILINESTRING" "sfg"
##  - attr(*, "sf_column")= chr "geometry"
##  - attr(*, "agr")= Factor w/ 3 levels "constant","aggregate",..: NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "names")= chr [1:11] "park_name" "width_ft" "class" "surface" ...

st_crs(park_trails)

## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     ENSEMBLE["World Geodetic System 1984 ensemble",
##         MEMBER["World Geodetic System 1984 (Transit)"],
##         MEMBER["World Geodetic System 1984 (G730)"],
##         MEMBER["World Geodetic System 1984 (G873)"],
##         MEMBER["World Geodetic System 1984 (G1150)"],
##         MEMBER["World Geodetic System 1984 (G1674)"],
##         MEMBER["World Geodetic System 1984 (G1762)"],
##         MEMBER["World Geodetic System 1984 (G2139)"],
##         MEMBER["World Geodetic System 1984 (G2296)"],
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]],
##         ENSEMBLEACCURACY[2.0]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     USAGE[
##         SCOPE["Horizontal component of 3D system."],
##         AREA["World."],
##         BBOX[-90,-180,90,180]],
##     ID["EPSG",4326]]

plot(st_geometry(park_trails), main='Inwood Hill Official NYC Parks Trails')

# While I have a shapefile for the informal paths, I noticed point that no CRS was assigned to my lines. I originally just used st_set_crs() to solve the problem, but I later saw that it caused errors as the CRS was still NA (it could not be transformed.) Thus I uploaded my shapefiles into Google Earth Pro, and am reading in the KML file with an assigned CRS below. 

dist_lines <- st_read("disturbancelines.kml")

## Reading layer `Line_gen' from data source 
##   `/Users/jiadavalenza/Desktop/spring 2025/GTECH 385/Final Project/disturbancelines.kml' 
##   using driver `KML'
## Simple feature collection with 5 features and 2 fields
## Geometry type: LINESTRING
## Dimension:     XYZ
## Bounding box:  xmin: -73.92527 ymin: 40.8691 xmax: -73.923 ymax: 40.87059
## z_range:       zmin: 28.75419 zmax: 49.29794
## Geodetic CRS:  WGS 84

st_crs(dist_lines)

## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["geodetic latitude (Lat)",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["geodetic longitude (Lon)",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]

#plotting the park boundaries from the polygon I created in Google Earth Pro

st_layers("InwoodHillPark.kml")

## Driver: KML 
## Available layers:

#reading in the kml file
IHP <- st_read("InwoodHillPark.kml")

## Reading layer `Inwood Hill Park.kml' from data source 
##   `/Users/jiadavalenza/Desktop/spring 2025/GTECH 385/Final Project/InwoodHillPark.kml' 
##   using driver `KML'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: POLYGON
## Dimension:     XYZ
## Bounding box:  xmin: -73.93215 ymin: 40.86678 xmax: -73.91845 ymax: 40.87758
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84

#plotting the data 

ggplot() +
  labs(x="Longitude (WGS84)", y="Latitude",
       title="Inwood Hill Park") + 
  geom_sf(data=IHP, col="blue", lwd=0.4, pch=21) +
  theme_bw()

#I included the line below, as one of my data sets was created on a flat plane, while the other on a curved plane, which kept causing an error when I tried to use st_intersection(). This avoids the problem. 
sf::sf_use_s2(FALSE)
          
# Here I am clipping the park trails to the boundaries of Inwood Hill Park, so I only have the trails I am interested in.
IHP_trails <- st_intersection(park_trails, IHP)

## although coordinates are longitude/latitude, st_intersection assumes that they are planar

## Warning: attribute variables are assumed to be spatially constant throughout all geometries

#Finally, plotting the park boundaries, disturbance lines, and relevant park trails all together
ggplot() + 
  geom_sf(data = IHP, col="blue", lwd=0.4, pch=21) +
  theme_bw() +
  geom_sf(data = IHP_trails) + 
  geom_sf(data=dist_lines, col="magenta", lwd=1,pch=21)+
  ggtitle("Inwood Hill Park Official & Unofficial Trails")

#Plotting the above made me realize that 2 of the informal paths measured in field work are actually official park paths. I have thus removed those paths below, to only plot the remaining informal paths and the official park bounds. 

finaldist_lines <- st_read("final_lines.kml")

## Reading layer `Line_gen' from data source 
##   `/Users/jiadavalenza/Desktop/spring 2025/GTECH 385/Final Project/final_lines.kml' using driver `KML'
## Simple feature collection with 3 features and 2 fields
## Geometry type: LINESTRING
## Dimension:     XYZ
## Bounding box:  xmin: -73.92492 ymin: 40.8691 xmax: -73.923 ymax: 40.87059
## z_range:       zmin: 28.75419 zmax: 42.14467
## Geodetic CRS:  WGS 84

ggplot() + 
  geom_sf(data = IHP, col="blue", lwd=0.4, pch=21) +
  theme_bw() +
  geom_sf(data = IHP_trails) + 
  geom_sf(data=finaldist_lines, col="magenta", lwd=1,pch=21)+
  ggtitle("Inwood Hill Park Official & Unofficial Trails")

#Finally, I am plotting the sample plot areas for the three forest types, to visually model what the final plot will look like with the real data. Since these are samples, I will not be conducting analysis on these plots. 

BL_plot <- st_read("BlackLocust_Sample.kml")

## Reading layer `BlackLocust_Sample.kml' from data source 
##   `/Users/jiadavalenza/Desktop/spring 2025/GTECH 385/Final Project/BlackLocust_Sample.kml' 
##   using driver `KML'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: POLYGON
## Dimension:     XYZ
## Bounding box:  xmin: -73.92519 ymin: 40.86905 xmax: -73.92415 ymax: 40.87003
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84

NF_plot <- st_read("NativeForest_Sample.kml")

## Reading layer `NativeForest_Sample.kml' from data source 
##   `/Users/jiadavalenza/Desktop/spring 2025/GTECH 385/Final Project/NativeForest_Sample.kml' 
##   using driver `KML'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: POLYGON
## Dimension:     XYZ
## Bounding box:  xmin: -73.9239 ymin: 40.86999 xmax: -73.92245 ymax: 40.87081
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84

NM_plot <- st_read("NorwayMaple_Sample.kml")

## Reading layer `NorwayMaple_Sample.kml' from data source 
##   `/Users/jiadavalenza/Desktop/spring 2025/GTECH 385/Final Project/NorwayMaple_Sample.kml' 
##   using driver `KML'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: POLYGON
## Dimension:     XYZ
## Bounding box:  xmin: -73.92443 ymin: 40.8693 xmax: -73.92298 ymax: 40.87027
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84

# Final map! 

ggplot() + 
  geom_sf(data = IHP, col="gray40", lwd=0.4, pch=21) +
  theme_bw() +
  geom_sf(data = IHP_trails,col="gray40") + 
  geom_sf(data=BL_plot, aes(fill="Black Locust"),lwd=0.4, pch=21)+
  geom_sf(data=NM_plot, aes(fill="Norway Maple"),lwd=0.4, pch=21)+
  geom_sf(data=NF_plot, aes(fill="Native Forest"),lwd=0.4, pch=21)+
  geom_sf(data=finaldist_lines, aes(fill="Informal Paths"),lwd=1,pch=21)+ 
  scale_fill_manual(values = c("Black Locust" = "red", "Norway Maple"= "blue","Native Forest"="darkgreen","Informal Paths"="yellow")) +  
  labs(title = "Informal Paths and Study Plots in Inwood Hill Park",
       fill = "Forest Type & Informal Paths")

knitting

install.packages("knitr")

## Error in install.packages : Updating loaded packages

library(knitr)
install.packages("rmarkdown")

## Error in install.packages : Updating loaded packages

library(rmarkdown)

options(knitr.duplicate.label = "allow")

#knitr::opts_chunk$set(echo = TRUE)

#rmarkdown::render("Valenza_Final.rmd")

rmarkdown::render("Valenza_Final.rmd")

Final Project - Earthworms Capstone Figures

Jiada Valenza

Week 1: Reading in the data & data cleanup

knitting