Final Project - Earthworms Capstone Figures

##Downloads

require(tidyverse); require(magrittr);
require(sp);require(sf);
require(classInt);require(RColorBrewer);
require(ggplot2);require(ggmap);require(leaflet);
require(ggrepel);
library(ggpubr); library(devtools);
require(htmlwidgets); require(mapview); require(gt)

setwd("~/Desktop/spring 2025/GTECH 385/Final Project")

Week 1: Reading in the data & data cleanup

#reading in the ignition data (tells us the organic matter content of the soils)

ig_loss <- readr::read_csv("Ignition_Loss.csv", lazy = FALSE)

## Rows: 108 Columns: 12
## ── Column specification ─────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Sample_ID, Plot Type, Depth, Date
## dbl (7): Crucible_No, Crucible_Mass, Crucible_Soil, Soil_Initial, Final_Crucible_...
## lgl (1): Org_Mat
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(ig_loss)

## spc_tbl_ [108 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Sample_ID          : chr [1:108] "BL1" "BL1" "BL2" "BL2" ...
##  $ Plot Type          : chr [1:108] NA NA NA NA ...
##  $ Depth              : chr [1:108] "A" "B" "A" "B" ...
##  $ Crucible_No        : num [1:108] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Crucible_Mass      : num [1:108] 42.1 47.2 38.2 39 38.3 ...
##  $ Crucible_Soil      : num [1:108] 44.6 50.1 40.5 41.6 40.4 ...
##  $ Soil_Initial       : num [1:108] 2.53 2.89 2.31 2.63 2.12 ...
##  $ Final_Crucible_Soil: num [1:108] 44.2 49.9 40.2 41.4 40.1 ...
##  $ Final_Soil         : num [1:108] 2.18 2.67 1.97 2.37 1.84 ...
##  $ Loss               : num [1:108] 0.348 0.218 0.339 0.254 0.275 0.211 0.379 0.428 0.287 0.202 ...
##  $ Org_Mat            : logi [1:108] NA NA NA NA NA NA ...
##  $ Date               : chr [1:108] "2/10/25" "2/10/25" "2/10/25" "2/10/25" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Sample_ID = col_character(),
##   ..   `Plot Type` = col_character(),
##   ..   Depth = col_character(),
##   ..   Crucible_No = col_double(),
##   ..   Crucible_Mass = col_double(),
##   ..   Crucible_Soil = col_double(),
##   ..   Soil_Initial = col_double(),
##   ..   Final_Crucible_Soil = col_double(),
##   ..   Final_Soil = col_double(),
##   ..   Loss = col_double(),
##   ..   Org_Mat = col_logical(),
##   ..   Date = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

#reading in the C:N data (tells us about carbon versus nitrogen content of the soils)

cn_ratio <- readr::read_csv("Leaf_LitterCN.csv", lazy = FALSE)

## Rows: 54 Columns: 8
## ── Column specification ─────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Sorted, Name, Sample Month
## dbl (5): Run_No, Weight, Perc_N, Perc_C, C_N_Ratio
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(cn_ratio)

## spc_tbl_ [54 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Sorted      : chr [1:54] "Y" "Y" "N" "N" ...
##  $ Run_No      : num [1:54] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Weight      : num [1:54] 250 249 252 251 251 ...
##  $ Name        : chr [1:54] "BL1" "BL1" "BL2" "BL2" ...
##  $ Sample Month: chr [1:54] "June" "June" "June" "June" ...
##  $ Perc_N      : num [1:54] 1.26 1.25 1.8 1.76 1.58 ...
##  $ Perc_C      : num [1:54] 45.5 45.6 39.8 39.1 42.3 ...
##  $ C_N_Ratio   : num [1:54] 36.2 36.6 22.1 22.2 26.8 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Sorted = col_character(),
##   ..   Run_No = col_double(),
##   ..   Weight = col_double(),
##   ..   Name = col_character(),
##   ..   `Sample Month` = col_character(),
##   ..   Perc_N = col_double(),
##   ..   Perc_C = col_double(),
##   ..   C_N_Ratio = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

#reading in the soil pH data 

soil_pH <- readr::read_csv("SoilpH.csv", lazy = FALSE)

## Rows: 36 Columns: 9
## ── Column specification ─────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): SampleID, Type, Plot, Depth, Forest_Type, Tube, Date
## dbl (2): Soil_Mass, pH
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

str(soil_pH)

## spc_tbl_ [36 × 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ SampleID   : chr [1:36] "B1_1" "B1_1" "B1_2" "B1_2" ...
##  $ Type       : chr [1:36] "Field" "Field" "Field" "Field" ...
##  $ Plot       : chr [1:36] "BL1" "BL1" "BL1" "BL1" ...
##  $ Depth      : chr [1:36] "0 to 5" "0 to 5" "5 to 10" "5 to 10" ...
##  $ Forest_Type: chr [1:36] "Black Locust" "Black Locust" "Black Locust" "Black Locust" ...
##  $ Tube       : chr [1:36] "4-1" "4-2" "11-1" "11-2" ...
##  $ Soil_Mass  : num [1:36] 15 15 15 15 15 ...
##  $ Date       : chr [1:36] "10/11/24" "10/11/24" "10/11/24" "10/11/24" ...
##  $ pH         : num [1:36] 5.8 5.39 5.37 5.21 5.73 5.68 8.2 8.43 6.03 6.33 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   SampleID = col_character(),
##   ..   Type = col_character(),
##   ..   Plot = col_character(),
##   ..   Depth = col_character(),
##   ..   Forest_Type = col_character(),
##   ..   Tube = col_character(),
##   ..   Soil_Mass = col_double(),
##   ..   Date = col_character(),
##   ..   pH = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

#creating individual dataframes for each forest stand (3 total, Black Locust, Native Forest, and Norway Maple) with consistent naming conventions. All new data frames below will have a column labeled "Forest_Type" that will use the same label for the forest stand in question.

library(magrittr)
library(dplyr)

#######################Black Locust data######################## 

#pH
BL_pH <- soil_pH[which(soil_pH$Forest_Type=="Black Locust"),]

#CN 
BL_lab_cn <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^B')==T ~ 'Black Locust',TRUE ~ "N")) 
BL_lab_cn <- cbind(BL_lab_cn,cn_ratio)
BL_CN <- BL_lab_cn[which(BL_lab_cn$Forest_Type=="Black Locust"),]

#ignition loss (using the preliminary data right now, some more observations need to be added)
BL_lab_ig <- ig_loss %>% select(Sample_ID) %>% mutate(Forest_Type=case_when(str_detect(Sample_ID,'^BL')==T ~ 'Black Locust',TRUE ~ "N"))
BL_lab_ig <- cbind(BL_lab_ig,ig_loss)
BL_ig <- BL_lab_ig[which(BL_lab_ig$Forest_Type=="Black Locust"),]

# All BL data combined. I need to use merge because of different row length in the CN data), so I begin by using cbind for the ig and pH data, which have the same number of rows. This is because the merge function only takes two data frames at a time (I got an error when I tried to do all three at once.)

BL_pHandig <-cbind(BL_pH,BL_ig)
BL_data <- merge(data.frame(BL_pHandig, row.names=NULL), data.frame(BL_CN, row.names=NULL), by=0, all=TRUE) [-1] 

########################Native Forest pH data############################ 

#pH
NF_pH <- soil_pH[which(soil_pH$Forest_Type=="Native Forest"),]

#CN 
NF_lab_cn <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^NF')==T ~ 'Native Forest',TRUE ~ "N")) 
NF_lab_cn <- cbind(NF_lab_cn,cn_ratio)
NF_CN <- NF_lab_cn[which(NF_lab_cn$Forest_Type=="Native Forest"),]

#ignition loss (using the preliminary data right now, some more observations need to be added)
NF_lab_ig <- ig_loss %>% select(Sample_ID) %>% mutate(Forest_Type=case_when(str_detect(Sample_ID,'^NF')==T ~ 'Native Forest',TRUE ~ "N"))
NF_lab_ig <- cbind(NF_lab_ig,ig_loss)
NF_ig <- NF_lab_ig[which(NF_lab_ig$Forest_Type=="Native Forest"),]

# All NF data combined. Here I can just use cbind because the three data frames have the same number of rows.
NF_data <- cbind(NF_CN,NF_ig,NF_pH)

#########################Norway Maple pH data###########################

#pH
NM_pH <- soil_pH[which(soil_pH$Forest_Type=="Norway Maple"),]

#CN 
NM_lab_cn <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^NM')==T ~ 'Norway Maple',TRUE ~ "N")) 

NM_lab_cn2 <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^No')==T ~ 'Norway Maple',TRUE ~ "N")) 

NM_lab_cn <- bind_rows(NM_lab_cn,NM_lab_cn2)

cn2 <- cn_ratio
cn2 <- bind_rows(cn2,cn_ratio)
NM_lab_cn <- cbind(NM_lab_cn,cn2)
NM_CN <- NM_lab_cn[which(NM_lab_cn$Forest_Type=="Norway Maple"),]

#ignition loss (using the preliminary data right now, some more observations need to be added)
NM_lab_ig <- ig_loss %>% select(Sample_ID) %>% mutate(Forest_Type=case_when(str_detect(Sample_ID,'^NM')==T ~ 'Norway Maple',TRUE ~ "N"))
NM_lab_ig <- cbind(NM_lab_ig,ig_loss)
NM_ig <- NM_lab_ig[which(NM_lab_ig$Forest_Type=="Norway Maple"),]

# All NM data combined. Here I have to use merge again because each data frame has a different number of rows. 
NM_dat1 <- merge(data.frame(NM_pH, row.names=NULL), data.frame(NM_CN, row.names=NULL), by=0, all=TRUE) [-1] 
NM_data <- merge(data.frame(NM_dat1, row.names=NULL), data.frame(NM_ig, row.names=NULL), by=0, all=TRUE) [-1] 

# Finally, I will create a data frame per category (i.e. all corrected pH data in one df, all CN together, etc.)

pH <- bind_rows(BL_pH,NF_pH,NM_pH)
CN <- bind_rows(BL_CN,NF_CN,NM_CN)

## New names:
## New names:
## New names:
## • `Name` -> `Name...1`
## • `Name` -> `Name...6`

ign <- bind_rows(BL_ig,NF_ig,NM_ig)

## New names:
## New names:
## New names:
## • `Sample_ID` -> `Sample_ID...1`
## • `Sample_ID` -> `Sample_ID...3`

#creating a data frame with all of the data frames combined. For this, I will have to use the merge function twice.

all_ver1 <- merge(data.frame(pH, row.names=NULL), data.frame(CN, row.names=NULL), by=0, all=TRUE) [-1] 
all_data <- merge (data.frame(all_ver1, row.names=NULL), data.frame(ign, row.names=NULL), by=0, all=TRUE) [-1] 

#Now, I have data frames containing data for each forest stand, all with a consistent label for each data type, making them easier to work with in my analysis for Week 2.

##Week 2: Boxplots, Range plots, and stat analyses

#########################CN boxplots (CN Ratio by dominant Forest Stand)############################

ggplot(CN,aes(as.factor(Forest_Type),C_N_Ratio))+geom_boxplot(col=c("red","darkgreen","blue"))+labs(x='Forest Type',y='C:N Ratio',title="Carbon Nitrogen (C:N) Ratios in Leaf Litter, by Forest Type")

##EDITS: assign colors to each Forest Type in the box plot, to be used in the range plot below

####################################Horizontal range plot for pH######################################### 
library(ggplot2)

#now I am finding the lowest and highest values for each forest stand pH, so I may create my range plot. I was having trouble doing it in a similar way to how I created the boxplot, I keep getting an error message. 

lowest_BL <- which.min(BL_pH$pH)
print(lowest_BL)

## [1] 4

#5.21 is the lowest pH in BL
highest_BL <- which.max(BL_pH$pH)
print(highest_BL)

## [1] 8

#8.43 is the highest pH in BL 
mean(BL_pH$pH)

## [1] 6.479167

#6.48 is the mean pH 

lowest_NF  <- which.min(NF_pH$pH)
print(lowest_NF)

## [1] 8

#4.04 is the lowest pH in NF
highest_NF <- which.max(NF_pH$pH)
print(highest_NF)

## [1] 11

#5.57 is the highest pH in NF
mean(NF_pH$pH)

## [1] 4.915

#4.92 is the mean pH

lowest_NM  <- which.min(NM_pH$pH)
print(lowest_NM)

## [1] 8

#4.46 is the lowest pH in NM
highest_NM <- which.max(NM_pH$pH)
print(highest_NM)

## [1] 11

#5.95 is the highest pH in NM
mean(NM_pH$pH)

## [1] 5.156667

#5.16 is the mean pH

install.packages(ggplot2)

## Error in install.packages : object 'ggplot2' not found

library(ggplot2)

par(mar = c(1, 19, 1, 19))
d=data.frame(Forest_Type=c("Black Locust","Native Forest","Norway Maple"), Soil_pH=c(6.48,4.92,5.16), lower=c(5.21,4.04,4.46), upper=c(8.43,5.57,5.95))
ggplot() + 
geom_pointrange(data=d, mapping=aes(x=Forest_Type , y=Soil_pH, ymin=upper, ymax=lower), width=0.2, size=1, col=c("red","darkgreen","blue"), shape=22) + labs(title="Soil pH by Forest Type")

## Warning in geom_pointrange(data = d, mapping = aes(x = Forest_Type, y = Soil_pH, :
## Ignoring unknown parameters: `width`

##############################Organic Matter from LOI####################################

#creating a new column to calculate percentage of organic matter
ignition <- ign %>% mutate(Per_OM=Loss/Soil_Initial*100)

all_dat <- merge (data.frame(all_data, row.names=NULL), data.frame(ignition, row.names=NULL), by=0, all=TRUE) [-1]

## Warning in merge.data.frame(data.frame(all_data, row.names = NULL),
## data.frame(ignition, : column names 'Forest_Type.x', 'Forest_Type.y' are duplicated
## in the result

ggplot(ignition,aes(as.factor(Forest_Type),Per_OM))+geom_boxplot(col=c("red","darkgreen","blue"))+labs(x='Forest Type',y='Percentage of Organic Matter',title="Soil Organic Matter Content, by Forest Type")

############################Data table##################################
library(tidyverse)
library(RColorBrewer)
install.packages('gtExtras')

## Error in install.packages : Updating loaded packages

library(gtExtras)

plot <- all_dat %>% 
  rename(`Forest Type` = Forest_Type.x) %>%
  group_by(`Forest Type`) %>%
  reframe(`Soil pH` = range(pH),
            `Percent Organic Matter (Leaf Litter)` = mean(Per_OM),
            `Average CN Ratio (Leaf Litter)` = round(mean(C_N_Ratio),digits=2)) %>%
  gt() %>%
  tab_header(title = 'Soil and Leaf Litter Characteristics in Inwood Hill Park, by Forest Type') %>%
  cols_align(align="left")

knitting

install.packages("knitr")

## Error in install.packages : Updating loaded packages

library(knitr)
install.packages("rmarkdown")

## Error in install.packages : Updating loaded packages

library(rmarkdown)

options(knitr.duplicate.label = "allow")

#knitr::opts_chunk$set(echo = TRUE)

#rmarkdown::render("Valenza_Final.rmd")

rmarkdown::render("Valenza_Final.rmd")

Final Project - Earthworms Capstone Figures

Jiada Valenza

Week 1: Reading in the data & data cleanup

knitting