##Downloads
require(tidyverse); require(magrittr);
require(sp);require(sf);
require(classInt);require(RColorBrewer);
require(ggplot2);require(ggmap);require(leaflet);
require(ggrepel);
library(ggpubr); library(devtools);
require(htmlwidgets); require(mapview); require(gt)
setwd("~/Desktop/spring 2025/GTECH 385/Final Project")
#reading in the ignition data (tells us the organic matter content of the soils)
ig_loss <- readr::read_csv("Ignition_Loss.csv", lazy = FALSE)
## Rows: 108 Columns: 12
## ── Column specification ─────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Sample_ID, Plot Type, Depth, Date
## dbl (7): Crucible_No, Crucible_Mass, Crucible_Soil, Soil_Initial, Final_Crucible_...
## lgl (1): Org_Mat
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(ig_loss)
## spc_tbl_ [108 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Sample_ID : chr [1:108] "BL1" "BL1" "BL2" "BL2" ...
## $ Plot Type : chr [1:108] NA NA NA NA ...
## $ Depth : chr [1:108] "A" "B" "A" "B" ...
## $ Crucible_No : num [1:108] 1 2 3 4 5 6 7 8 9 10 ...
## $ Crucible_Mass : num [1:108] 42.1 47.2 38.2 39 38.3 ...
## $ Crucible_Soil : num [1:108] 44.6 50.1 40.5 41.6 40.4 ...
## $ Soil_Initial : num [1:108] 2.53 2.89 2.31 2.63 2.12 ...
## $ Final_Crucible_Soil: num [1:108] 44.2 49.9 40.2 41.4 40.1 ...
## $ Final_Soil : num [1:108] 2.18 2.67 1.97 2.37 1.84 ...
## $ Loss : num [1:108] 0.348 0.218 0.339 0.254 0.275 0.211 0.379 0.428 0.287 0.202 ...
## $ Org_Mat : logi [1:108] NA NA NA NA NA NA ...
## $ Date : chr [1:108] "2/10/25" "2/10/25" "2/10/25" "2/10/25" ...
## - attr(*, "spec")=
## .. cols(
## .. Sample_ID = col_character(),
## .. `Plot Type` = col_character(),
## .. Depth = col_character(),
## .. Crucible_No = col_double(),
## .. Crucible_Mass = col_double(),
## .. Crucible_Soil = col_double(),
## .. Soil_Initial = col_double(),
## .. Final_Crucible_Soil = col_double(),
## .. Final_Soil = col_double(),
## .. Loss = col_double(),
## .. Org_Mat = col_logical(),
## .. Date = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
#reading in the C:N data (tells us about carbon versus nitrogen content of the soils)
cn_ratio <- readr::read_csv("Leaf_LitterCN.csv", lazy = FALSE)
## Rows: 54 Columns: 8
## ── Column specification ─────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Sorted, Name, Sample Month
## dbl (5): Run_No, Weight, Perc_N, Perc_C, C_N_Ratio
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(cn_ratio)
## spc_tbl_ [54 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Sorted : chr [1:54] "Y" "Y" "N" "N" ...
## $ Run_No : num [1:54] 1 1 1 1 1 1 1 1 1 1 ...
## $ Weight : num [1:54] 250 249 252 251 251 ...
## $ Name : chr [1:54] "BL1" "BL1" "BL2" "BL2" ...
## $ Sample Month: chr [1:54] "June" "June" "June" "June" ...
## $ Perc_N : num [1:54] 1.26 1.25 1.8 1.76 1.58 ...
## $ Perc_C : num [1:54] 45.5 45.6 39.8 39.1 42.3 ...
## $ C_N_Ratio : num [1:54] 36.2 36.6 22.1 22.2 26.8 ...
## - attr(*, "spec")=
## .. cols(
## .. Sorted = col_character(),
## .. Run_No = col_double(),
## .. Weight = col_double(),
## .. Name = col_character(),
## .. `Sample Month` = col_character(),
## .. Perc_N = col_double(),
## .. Perc_C = col_double(),
## .. C_N_Ratio = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
#reading in the soil pH data
soil_pH <- readr::read_csv("SoilpH.csv", lazy = FALSE)
## Rows: 36 Columns: 9
## ── Column specification ─────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): SampleID, Type, Plot, Depth, Forest_Type, Tube, Date
## dbl (2): Soil_Mass, pH
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(soil_pH)
## spc_tbl_ [36 × 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ SampleID : chr [1:36] "B1_1" "B1_1" "B1_2" "B1_2" ...
## $ Type : chr [1:36] "Field" "Field" "Field" "Field" ...
## $ Plot : chr [1:36] "BL1" "BL1" "BL1" "BL1" ...
## $ Depth : chr [1:36] "0 to 5" "0 to 5" "5 to 10" "5 to 10" ...
## $ Forest_Type: chr [1:36] "Black Locust" "Black Locust" "Black Locust" "Black Locust" ...
## $ Tube : chr [1:36] "4-1" "4-2" "11-1" "11-2" ...
## $ Soil_Mass : num [1:36] 15 15 15 15 15 ...
## $ Date : chr [1:36] "10/11/24" "10/11/24" "10/11/24" "10/11/24" ...
## $ pH : num [1:36] 5.8 5.39 5.37 5.21 5.73 5.68 8.2 8.43 6.03 6.33 ...
## - attr(*, "spec")=
## .. cols(
## .. SampleID = col_character(),
## .. Type = col_character(),
## .. Plot = col_character(),
## .. Depth = col_character(),
## .. Forest_Type = col_character(),
## .. Tube = col_character(),
## .. Soil_Mass = col_double(),
## .. Date = col_character(),
## .. pH = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
#creating individual dataframes for each forest stand (3 total, Black Locust, Native Forest, and Norway Maple) with consistent naming conventions. All new data frames below will have a column labeled "Forest_Type" that will use the same label for the forest stand in question.
library(magrittr)
library(dplyr)
#######################Black Locust data########################
#pH
BL_pH <- soil_pH[which(soil_pH$Forest_Type=="Black Locust"),]
#CN
BL_lab_cn <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^B')==T ~ 'Black Locust',TRUE ~ "N"))
BL_lab_cn <- cbind(BL_lab_cn,cn_ratio)
BL_CN <- BL_lab_cn[which(BL_lab_cn$Forest_Type=="Black Locust"),]
#ignition loss (using the preliminary data right now, some more observations need to be added)
BL_lab_ig <- ig_loss %>% select(Sample_ID) %>% mutate(Forest_Type=case_when(str_detect(Sample_ID,'^BL')==T ~ 'Black Locust',TRUE ~ "N"))
BL_lab_ig <- cbind(BL_lab_ig,ig_loss)
BL_ig <- BL_lab_ig[which(BL_lab_ig$Forest_Type=="Black Locust"),]
# All BL data combined. I need to use merge because of different row length in the CN data), so I begin by using cbind for the ig and pH data, which have the same number of rows. This is because the merge function only takes two data frames at a time (I got an error when I tried to do all three at once.)
BL_pHandig <-cbind(BL_pH,BL_ig)
BL_data <- merge(data.frame(BL_pHandig, row.names=NULL), data.frame(BL_CN, row.names=NULL), by=0, all=TRUE) [-1]
########################Native Forest pH data############################
#pH
NF_pH <- soil_pH[which(soil_pH$Forest_Type=="Native Forest"),]
#CN
NF_lab_cn <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^NF')==T ~ 'Native Forest',TRUE ~ "N"))
NF_lab_cn <- cbind(NF_lab_cn,cn_ratio)
NF_CN <- NF_lab_cn[which(NF_lab_cn$Forest_Type=="Native Forest"),]
#ignition loss (using the preliminary data right now, some more observations need to be added)
NF_lab_ig <- ig_loss %>% select(Sample_ID) %>% mutate(Forest_Type=case_when(str_detect(Sample_ID,'^NF')==T ~ 'Native Forest',TRUE ~ "N"))
NF_lab_ig <- cbind(NF_lab_ig,ig_loss)
NF_ig <- NF_lab_ig[which(NF_lab_ig$Forest_Type=="Native Forest"),]
# All NF data combined. Here I can just use cbind because the three data frames have the same number of rows.
NF_data <- cbind(NF_CN,NF_ig,NF_pH)
#########################Norway Maple pH data###########################
#pH
NM_pH <- soil_pH[which(soil_pH$Forest_Type=="Norway Maple"),]
#CN
NM_lab_cn <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^NM')==T ~ 'Norway Maple',TRUE ~ "N"))
NM_lab_cn2 <- cn_ratio %>% select(Name) %>% mutate(Forest_Type=case_when(str_detect(Name,'^No')==T ~ 'Norway Maple',TRUE ~ "N"))
NM_lab_cn <- bind_rows(NM_lab_cn,NM_lab_cn2)
cn2 <- cn_ratio
cn2 <- bind_rows(cn2,cn_ratio)
NM_lab_cn <- cbind(NM_lab_cn,cn2)
NM_CN <- NM_lab_cn[which(NM_lab_cn$Forest_Type=="Norway Maple"),]
#ignition loss (using the preliminary data right now, some more observations need to be added)
NM_lab_ig <- ig_loss %>% select(Sample_ID) %>% mutate(Forest_Type=case_when(str_detect(Sample_ID,'^NM')==T ~ 'Norway Maple',TRUE ~ "N"))
NM_lab_ig <- cbind(NM_lab_ig,ig_loss)
NM_ig <- NM_lab_ig[which(NM_lab_ig$Forest_Type=="Norway Maple"),]
# All NM data combined. Here I have to use merge again because each data frame has a different number of rows.
NM_dat1 <- merge(data.frame(NM_pH, row.names=NULL), data.frame(NM_CN, row.names=NULL), by=0, all=TRUE) [-1]
NM_data <- merge(data.frame(NM_dat1, row.names=NULL), data.frame(NM_ig, row.names=NULL), by=0, all=TRUE) [-1]
# Finally, I will create a data frame per category (i.e. all corrected pH data in one df, all CN together, etc.)
pH <- bind_rows(BL_pH,NF_pH,NM_pH)
CN <- bind_rows(BL_CN,NF_CN,NM_CN)
## New names:
## New names:
## New names:
## • `Name` -> `Name...1`
## • `Name` -> `Name...6`
ign <- bind_rows(BL_ig,NF_ig,NM_ig)
## New names:
## New names:
## New names:
## • `Sample_ID` -> `Sample_ID...1`
## • `Sample_ID` -> `Sample_ID...3`
#creating a data frame with all of the data frames combined. For this, I will have to use the merge function twice.
all_ver1 <- merge(data.frame(pH, row.names=NULL), data.frame(CN, row.names=NULL), by=0, all=TRUE) [-1]
all_data <- merge (data.frame(all_ver1, row.names=NULL), data.frame(ign, row.names=NULL), by=0, all=TRUE) [-1]
#Now, I have data frames containing data for each forest stand, all with a consistent label for each data type, making them easier to work with in my analysis for Week 2.
##Week 2: Boxplots, Range plots, and stat analyses
#########################CN boxplots (CN Ratio by dominant Forest Stand)############################
ggplot(CN,aes(as.factor(Forest_Type),C_N_Ratio))+geom_boxplot(col=c("red","darkgreen","blue"))+labs(x='Forest Type',y='C:N Ratio',title="Carbon Nitrogen (C:N) Ratios in Leaf Litter, by Forest Type")
##EDITS: assign colors to each Forest Type in the box plot, to be used in the range plot below
####################################Horizontal range plot for pH#########################################
library(ggplot2)
#now I am finding the lowest and highest values for each forest stand pH, so I may create my range plot. I was having trouble doing it in a similar way to how I created the boxplot, I keep getting an error message.
lowest_BL <- which.min(BL_pH$pH)
print(lowest_BL)
## [1] 4
#5.21 is the lowest pH in BL
highest_BL <- which.max(BL_pH$pH)
print(highest_BL)
## [1] 8
#8.43 is the highest pH in BL
mean(BL_pH$pH)
## [1] 6.479167
#6.48 is the mean pH
lowest_NF <- which.min(NF_pH$pH)
print(lowest_NF)
## [1] 8
#4.04 is the lowest pH in NF
highest_NF <- which.max(NF_pH$pH)
print(highest_NF)
## [1] 11
#5.57 is the highest pH in NF
mean(NF_pH$pH)
## [1] 4.915
#4.92 is the mean pH
lowest_NM <- which.min(NM_pH$pH)
print(lowest_NM)
## [1] 8
#4.46 is the lowest pH in NM
highest_NM <- which.max(NM_pH$pH)
print(highest_NM)
## [1] 11
#5.95 is the highest pH in NM
mean(NM_pH$pH)
## [1] 5.156667
#5.16 is the mean pH
install.packages(ggplot2)
## Error in install.packages : object 'ggplot2' not found
library(ggplot2)
par(mar = c(1, 19, 1, 19))
d=data.frame(Forest_Type=c("Black Locust","Native Forest","Norway Maple"), Soil_pH=c(6.48,4.92,5.16), lower=c(5.21,4.04,4.46), upper=c(8.43,5.57,5.95))
ggplot() +
geom_pointrange(data=d, mapping=aes(x=Forest_Type , y=Soil_pH, ymin=upper, ymax=lower), width=0.2, size=1, col=c("red","darkgreen","blue"), shape=22) + labs(title="Soil pH by Forest Type")
## Warning in geom_pointrange(data = d, mapping = aes(x = Forest_Type, y = Soil_pH, :
## Ignoring unknown parameters: `width`
##############################Organic Matter from LOI####################################
#creating a new column to calculate percentage of organic matter
ignition <- ign %>% mutate(Per_OM=Loss/Soil_Initial*100)
all_dat <- merge (data.frame(all_data, row.names=NULL), data.frame(ignition, row.names=NULL), by=0, all=TRUE) [-1]
## Warning in merge.data.frame(data.frame(all_data, row.names = NULL),
## data.frame(ignition, : column names 'Forest_Type.x', 'Forest_Type.y' are duplicated
## in the result
ggplot(ignition,aes(as.factor(Forest_Type),Per_OM))+geom_boxplot(col=c("red","darkgreen","blue"))+labs(x='Forest Type',y='Percentage of Organic Matter',title="Soil Organic Matter Content, by Forest Type")
############################Data table##################################
library(tidyverse)
library(RColorBrewer)
install.packages('gtExtras')
## Error in install.packages : Updating loaded packages
library(gtExtras)
plot <- all_dat %>%
rename(`Forest Type` = Forest_Type.x) %>%
group_by(`Forest Type`) %>%
reframe(`Soil pH` = range(pH),
`Percent Organic Matter (Leaf Litter)` = mean(Per_OM),
`Average CN Ratio (Leaf Litter)` = round(mean(C_N_Ratio),digits=2)) %>%
gt() %>%
tab_header(title = 'Soil and Leaf Litter Characteristics in Inwood Hill Park, by Forest Type') %>%
cols_align(align="left")
install.packages("knitr")
## Error in install.packages : Updating loaded packages
library(knitr)
install.packages("rmarkdown")
## Error in install.packages : Updating loaded packages
library(rmarkdown)
options(knitr.duplicate.label = "allow")
#knitr::opts_chunk$set(echo = TRUE)
#rmarkdown::render("Valenza_Final.rmd")
rmarkdown::render("Valenza_Final.rmd")