PHBV Soil Decomposition Genetic Experiment

Documentation

This document is intended to provide transparency and aid in communication between collaborators for the data analysis of the decomposition sensors.

Experiment Setup

The experiment was run in three rounds, referred to as Deployment A, Deployment B, and Deployment C. Each deployment consisted of replicates of live and sterilized soil samples, each with a decomposition sensor. The decomposition sensors were double sided with non-degradable PMMA traces on the back and degradable PHBV traces on the front. Data was collected every 30 minutes with custom res8 data loggers and uploaded to a Google Sheet. Relative humidity and temperature were also measured by the res8 data loggers in the mesocosm.

Deployment A consisted of 3 soil types (Ned, Bet, Garden). Each soil type had 3 replicates of live soil and 3 replicates of sterilized soil. Additionally 3 control replicates were installed empty microcosms (air). This deployment was run for 4 weeks.

Deployment B consisted of 4 soil types (MASS_FO, MASS_LA, KBS_C, KBS_R). Each soil type had 4 replicates of live soil and 2 replicates of sterilized soil. Additionally 2 control replicates were installed empty microcosms (air). This deployment was run for 5 weeks.

Deployment C consisted of 5 soil types (PES, LUF-R, LUF-F, VSP, ESF). Each soil type had 4 replicates of live soil and 2 replicates of sterilized soil. Additionally 2 control replicates were installed empty microcosms (air). This deployment was run for 5 weeks.

Required libraries

require(googlesheets4); require(lubridate); require(reshape)
library(ggplot2); library(ggpubr);library(cowplot); library(RColorBrewer)
library(scales)
library(tidyverse); library(dplyr); library(rstatix)
library(DescTools); library(changepoint)

#Use these functions to point to subfolders for file management
wd_top <- function(){setwd(dirname(rstudioapi::getActiveDocumentContext()$path))}
wd_figures <- function(){wd_top(); setwd(paste(getwd(),"/figures",sep=""))}

Import the Data

# We will do this in sections by each deployment

# Read in and format Deployment A ---------------------------------------------
#You will need to associate this with a Google account to pull in GSheet Data
#Get the first logger's data to initialize the dataframe
sensorIDs <- c("601a","602a","603a","604a","605a","606a")
gData <- read_sheet("https://docs.google.com/spreadsheets/d/1C6eezdOYUUC_DXZprJ553shEB2hF23nA5NrqE2I7rlc", sensorIDs[1])

#Initialize the dataframe
rawData <- data.frame(gData)
remainingSensors <- setdiff(sensorIDs, sensorIDs[1])

#Download the remaining loggers' data
for (theSensor in remainingSensors){
  gData <- read_sheet("https://docs.google.com/spreadsheets/d/1C6eezdOYUUC_DXZprJ553shEB2hF23nA5NrqE2I7rlc", theSensor)
  newData <- data.frame(gData)
  rawData <- rbind(rawData, newData)
}

#Add a column with the deployment letter code
rawData$deployment <- c("a")

rm(gData, newData, sensorIDs, remainingSensors,theSensor) #Clean up

#Pull in Experiment Lookup Table 
lData <- read_sheet("https://docs.google.com/spreadsheets/d/1UIwdcrfeHqmyhnNnboS45xjTZGszJLoxgp2UoDGVagM", "Deployment A")
lookupTableA <- data.frame(lData); rm(lData)

#Rename columns
names(rawData) <- c("dateTime", "device", "battery", "temperature", "rh",
                    "ch1", "ch2", "ch3", "ch4","ch5", "ch6", "ch7", "ch8",
                    "errorCode", "unknown", "deployment")

#Make the timestamps into POSIXct format to make manipulation easier
rawData$dateTime <- as.POSIXct(rawData$dateTime, tz='UTC')

#Convert to Mountain Time
rawData$dateTime <- rawData$dateTime
attr(rawData$dateTime, "tzone") <- "America/Denver"

#Subset to only include data after the experiment started
startTime <- as.POSIXct("2025-04-09 18:00:00 MDT")
rawData <- subset(rawData, dateTime >= startTime)

#Subset to only include data before the experiment ended
endTime <- as.POSIXct("2025-05-09 24:00:00 MDT")
rawData <- subset(rawData, dateTime <= endTime)

#Add a column with the time since the start of the experiment in days
rawData$day <- as.numeric(rawData$dateTime - min(rawData$dateTime), units = "days")

rawData <- select(rawData, -c("errorCode", "unknown")) #Remove unused columns

rawDataA <- rawData #Rename the data frame 



#Read in and format Deployment B ---------------------------------------------
#You will need to associate this with a Google account to pull in GSheet Data
#Get the first logger's data to initialize the dataframe
sensorIDs <- c("601b","602b","603b","604b","605b","606b","607b")
gData <- read_sheet("https://docs.google.com/spreadsheets/d/1C6eezdOYUUC_DXZprJ553shEB2hF23nA5NrqE2I7rlc", sensorIDs[1])

#Initialize the dataframe
rawData <- data.frame(gData)
remainingSensors <- setdiff(sensorIDs, sensorIDs[1])

#Download the remaining loggers' data
for (theSensor in remainingSensors){
  gData <- read_sheet("https://docs.google.com/spreadsheets/d/1C6eezdOYUUC_DXZprJ553shEB2hF23nA5NrqE2I7rlc", theSensor)
  newData <- data.frame(gData)
  rawData <- rbind(rawData, newData)
}

#Add a column with the deployment code
rawData$deployment <- c("b")

rm(gData, newData, sensorIDs, remainingSensors,theSensor) #Clean up

#Pull in Experiment Lookup Table 
lData <- read_sheet("https://docs.google.com/spreadsheets/d/1UIwdcrfeHqmyhnNnboS45xjTZGszJLoxgp2UoDGVagM", "Deployment B")
lookupTableB <- data.frame(lData); rm(lData)

#Rename columns
names(rawData) <- c("dateTime", "device", "battery", "temperature", "rh",
                    "ch1", "ch2", "ch3", "ch4","ch5", "ch6", "ch7", "ch8",
                    "errorCode", "unknown", "deployment")

#Make the timestamps into POSIXct format to make manipulation easier
rawData$dateTime <- as.POSIXct(rawData$dateTime, tz='UTC')

#Convert to Mountain Time
rawData$dateTime <- rawData$dateTime
attr(rawData$dateTime, "tzone") <- "America/Denver"

#Subset to only include data after the experiment started
startTime <- as.POSIXct("2025-08-29 19:00:00 MDT")
rawData <- subset(rawData, dateTime >= startTime)

#Subset to only include data before the experiment ended
endTime <- as.POSIXct("2025-10-03 24:00:00 MDT")
rawData <- subset(rawData, dateTime <= endTime)

#Add a column with the time since the start of the experiment in days
rawData$day <- as.numeric(rawData$dateTime - min(rawData$dateTime), units = "days")

rawData <- select(rawData, -c("errorCode", "unknown")) #Remove unused columns

rawDataB <- rawData #Rename the data frame 



# Read in and format Deployment C ----------------------------------------------
#You will need to associate this with a Google account to pull in GSheet Data
#Get the first logger's data to initialize the dataframe
sensorIDs <- c("601c","602c","603c","604c","605c","606c","607c","608c")
gData <- read_sheet("https://docs.google.com/spreadsheets/d/1C6eezdOYUUC_DXZprJ553shEB2hF23nA5NrqE2I7rlc", sensorIDs[1])

#Initialize the dataframe
rawData <- data.frame(gData)
remainingSensors <- setdiff(sensorIDs, sensorIDs[1])

#Download the remaining loggers' data
for (theSensor in remainingSensors){
  gData <- read_sheet("https://docs.google.com/spreadsheets/d/1C6eezdOYUUC_DXZprJ553shEB2hF23nA5NrqE2I7rlc", theSensor)
  newData <- data.frame(gData)
  rawData <- rbind(rawData, newData)
}

#Add a column with the deployment code
rawData$deployment <- c("c")

rm(gData, newData, sensorIDs, remainingSensors,theSensor) #Clean up

#Pull in Experiment Lookup Table
lData <- read_sheet("https://docs.google.com/spreadsheets/d/1UIwdcrfeHqmyhnNnboS45xjTZGszJLoxgp2UoDGVagM", "Deployment C")
lookupTableC <- data.frame(lData); rm(lData)

#Rename columns
names(rawData) <- c("dateTime", "device", "battery", "temperature", "rh",
                    "ch1", "ch2", "ch3", "ch4","ch5", "ch6", "ch7", "ch8",
                    "errorCode", "unknown", "deployment")

#Make the timestamps into POSIXct format to make manipulation easier
rawData$dateTime <- as.POSIXct(rawData$dateTime, tz='UTC')

#Convert to Mountain Time
rawData$dateTime <- rawData$dateTime
attr(rawData$dateTime, "tzone") <- "America/Denver"

#Subset to only include data after the experiment started
startTime <- as.POSIXct("2025-10-17 10:00:00 MDT")
rawData <- subset(rawData, dateTime >= startTime)

#Subset to only include data before the experiment ended
endTime <- as.POSIXct("2025-11-22 24:00:00 MDT")
rawData <- subset(rawData, dateTime <= endTime)

#Add a column with the time since the start of the experiment in days
rawData$day <- as.numeric(rawData$dateTime - min(rawData$dateTime), units = "days")

rawData <- select(rawData, -c("errorCode", "unknown")) #Remove unused columns

rawDataC <- rawData #Rename the data frame 

#Merge the data frames from each deployment 
rawDataAll <- rbind(rawDataA, rawDataB, rawDataC)
lookupTableAll <- rbind(lookupTableA, lookupTableB, lookupTableC)

#Remove data frames to cleanup workspace
rm(lookupTableA, lookupTableB, lookupTableC, rawData, rawDataA, rawDataB, rawDataC)

Initial Data Formatting

# Transform the Data ------------------------------------------------------
res8 <- melt(rawDataAll, id = c("dateTime","day","device","deployment"), value.name="reading")

names(res8) <- c("dateTime","day","device","deployment","sensorKind","reading")

####Export the raw data to a csv file####
wd_top(); write.csv(res8, file='res8.csv')

# Import the res8 dataframe -----------------------------------------------
wd_top(); res8 <- read.csv('res8.csv') #Import .csv file
res8 <- select(res8, c('dateTime','day','device','deployment','sensorKind','reading'))

#Set variable types
res8$sensorKind <- as.factor(res8$sensorKind)
res8$device <- as.factor(res8$device)
res8$deployment <- as.factor(res8$deployment)
res8$reading <- as.numeric(res8$reading)
res8$dateTime <- as.POSIXct(res8$dateTime)



####Split the data into two dataframes####
resSensors <- c("ch1","ch2","ch3","ch4","ch5","ch6","ch7","ch8")
diagnostics <- subset(res8, !(sensorKind %in% resSensors))
res8 <- subset(res8, sensorKind %in% resSensors)
names(res8) <- c("dateTime", "day", "device", "deployment", "sensor", "reading")

Plot the Diagnostic Data

Temperature, relative humidity, and battery voltage were measured through each deployment with the res8 data loggers inside the incubator.

#Diagnostic plots seperated by time
tempPlot <- ggplot(data=subset(diagnostics,sensorKind=="temperature"),
                   aes(dateTime,reading,fill=device)) + theme_bw() +
  geom_point(col="black",shape=21) + xlab("") + ylab("Temperature (C)") +
  scale_fill_brewer(palette = "Set3") +
  ggtitle("Logger Diagnostics") + theme(plot.title = element_text(hjust=0.5))

rhPlot <- ggplot(data=subset(diagnostics,sensorKind=="rh"),
                 aes(dateTime,reading,fill=device)) + theme_bw() +
  scale_fill_brewer(palette = "Set3") +
  geom_point(col="black",shape=21) + 
  xlab("") + ylab("Relative Humidity (%)")

battPlot <- ggplot(data=subset(diagnostics,sensorKind=="battery"),
                   aes(dateTime,reading,fill=device)) + theme_bw() + 
  scale_fill_brewer(palette = "Set3") +
  ylim(3.2, 4.2) +
  geom_point(col="black",shape=21) + 
  xlab("") + ylab("Battery Level (V)")

diagPlot <- ggarrange(tempPlot, rhPlot, battPlot, ncol=1, nrow=3, align = "hv",
                      common.legend=TRUE,legend="right")
rm(tempPlot, rhPlot, battPlot,resSensors) #Clean up the workspace

diagPlot

#Diagnostic plots with days aligned
diagnostics$deviceDeployment <- paste(diagnostics$device,
                                      diagnostics$deployment)

tempPlot <- ggplot(data=subset(diagnostics,sensorKind=="temperature"),
                   aes(day,reading,color=device)) + theme_bw() +
  geom_point(shape=20,size=1) + xlab("") + ylab("Temperature (C)") +
  scale_color_brewer(palette = "Set1") +
  ggtitle("Logger Diagnostics by Deployment") + theme(plot.title = element_text(hjust=0.5)) +
  facet_grid(cols=vars(deployment))

rhPlot <- ggplot(data=subset(diagnostics,sensorKind=="rh"),
                 aes(day,reading,color=device)) + theme_bw() +
  scale_color_brewer(palette = "Set1") +
  geom_point(shape=20,size=1) + 
  xlab("") + ylab("Relative Humidity (%)")+
  facet_grid(cols=vars(deployment))

battPlot <- ggplot(data=subset(diagnostics,sensorKind=="battery"),
                   aes(day,reading,color=device)) + theme_bw() + 
  scale_color_brewer(palette = "Set1") +
  ylim(3.2, 4.2) +
  geom_point(shape=20,size=1) + 
  xlab("") + ylab("Battery Level (V)")+
  facet_grid(cols=vars(deployment))

diagPlotAligned <- ggarrange(tempPlot, rhPlot, ncol=1, nrow=2, align = "hv",
                      common.legend=TRUE,legend="right")
rm(tempPlot, rhPlot, battPlot,resSensors) #Clean up the workspace

diagPlotAligned

#wd_figures()
ggsave(plot=diagPlotAligned, filename="diagPlot.png",height=9,width=12,dpi=300,
       bg="white")

Remove Known Sensors

During the experiment, one soil microcosm was accidentally contaminated, thus we will remove that sensor from the data set. We also ran a short experiment in compost tea, and we will remove those sensors from the data set as well.

# Now let's start cleaning up the PHBV sensor data
res8 <- merge(res8,lookupTableAll,by=c("device","deployment","sensor"))

res8$replicate <- as.numeric(res8$replicate)
res8$treatment <- as.factor(paste(res8$soil_type,res8$soil_treatment))

## Remove the worm tea traces for this analysis. Replicate [97, 108]
res8 <- res8 %>%
  filter(!soil_type %in% c("WT"))

## We know that KBS-R St1 was contaminated with live soil.(replicate 59 and 60)
res8 <- res8 %>%
  filter(replicate != 59)
res8 <- res8 %>%
  filter(replicate != 60)

## Remove open circuit resistance values
res8 <- res8 %>% 
  filter(reading != 2147483647)

# Normalize Resistance Data -----------------------------------------------
#This shows how to do a calculation that is important for our resistive sensors:
#We will take any resistance readings that fall between the variables rStart and
#rEnd, and calculate the initial resistance for each sensor, then put those
#initial resistances back in the main res8 dataframe:
rStart <- min(res8$day) + 0.5
rEnd <- rStart + 1

#Get initial resistances on a per-sensor basis
r0Data <- res8[res8$day <= rEnd & res8$day >= rStart,]
r0s <- r0Data %>% group_by(sensor) %>%
  dplyr::summarize(Ro = mean(reading, na.rm=T))

res8 <- merge(res8, r0s, by="sensor") #Merge Ro values with the dataframe
res8$R_norm <- res8$reading/res8$Ro #Calculate the normalized resistances
rm(r0Data, r0s) #Clean up the workspace

Plot Raw Sensor Data

#Define the color palette for soil types
soilPalette <- c("BLANK" = "grey", "PES" = "purple4", "LUF-R" = "darkgreen",
                 "ESF" = "brown4", "VSP" = "blue4", "LUF-F"="orange3", 
                 "MASS_FO" = "purple1", "MASS_LA" = "green1",
                 "KBS_C" = "brown1", "KBS_R" = "blue1",
                 "Bat" = "purple3", "Garden" = "turquoise1",
                 "Ned" = "red4")

####Show the raw data with the Ro window shown with dashed lines####
ggplot(data=res8, aes(x=day, y=reading, col=soil_type)) +
  theme_bw() +  geom_point(shape=20) +
  scale_color_manual(name="Soil Type", values=soilPalette) +
  xlab("") + ylab("Sensor Resistance (\u03a9)") + 
  ggtitle("Raw Readings") + theme(plot.title = element_text(hjust=0.5)) +
  geom_vline(xintercept = rStart,color="darkgreen", linetype="dashed") +
  geom_vline(xintercept = rEnd,color="darkred", linetype="dashed") +
  ylim(0,2500) + 
  facet_wrap(~sensor_type + soil_treatment)

Find Bad (Noisy) Sensors

Occasionally, sensors have a loose connection or mechanical damage that causes the sensor reading to be extremely noisy. We can find any damaged sensors by setting a noisy threshold and remove those sensors from the data set. This method can also flag sensors that are noisy near their end of life from degradation, so we will need to double check each of the flagged sensors to determine which should be removed.

#Let's show which sensors are bad based on a noise threshold
#First get some stats on the noise in the sensors from one reading to the next
noise_stats <- res8 %>%
  arrange(replicate, day) %>%
  group_by(replicate) %>%
  mutate(step_change = abs(R_norm - lag(R_norm))) %>%
  summarise(
    mean_noise   = mean(step_change, na.rm = TRUE),
    median_noise = median(step_change, na.rm = TRUE),
    p95_noise    = quantile(step_change, 0.95, na.rm = TRUE)
  )

#noise_stats #Output the noise stats

threshold <- quantile(noise_stats$median_noise, 0.99) #Set a noise threshold

#Define sensors that exceed the noise threshold as "bad sensors"
bad_sensors <- noise_stats %>%
  filter(mean_noise > threshold) %>%
  pull(replicate)

bad_sensors
## [1]  12  42 145
#Let's visually check what these sensor signals look like
ggplot(data=res8 %>% filter(replicate %in% bad_sensors),
       aes(x = day, y = R_norm, color = factor(replicate))) +
  geom_line() +
  labs(color = "replicate") +
  theme_minimal()

#Two of the sensors have very large responses (>50 normalized resistance). 
#It's very normal for the sensors to start to act noisy when they are severely 
#degraded so these sensors are not a concern

#Let's zoom in to check the third sensor
ggplot(data=res8 %>% filter(replicate %in% bad_sensors),
       aes(x = day, y = R_norm, color = factor(replicate))) +
  geom_line() +
  labs(color = "replicate") +
  ylim(0,10) +
  theme_minimal()

#This sensor actually looks extremely noisy even at low response (<10 normalized resistance).
#So we will remove this sensor from the data set


#Remove the only bad sensor (VSP PMMA Live from deployment C, replicate 145)
#Also remove the corresponding PHBV sensor
res8 <- res8 %>%
  filter(replicate != 145)
res8 <- res8 %>%
  filter(replicate != 146)

Changepoint Analysis

Changepoint analysis is useful for determining generally when degradation began to aid in our selection of a decomposition window.

# Changepoint Detection ---------------------------------------------------
####LIVE PHBV SENSORS DATASET CHANGEPOINT DETECTION####
#Get hourly averages across the whole dataframe 
break_points <- seq(from = 0, to = 35, by = 1/24)

#Let's only use the data from PHBV Live sensors
phbvLive <- res8 %>%
  filter(soil_treatment %in% c("Live") & sensor_type %in% c("PHBV"))

# Also Rnorm is R_norm
hourData <- phbvLive %>% 
  mutate(hourCuts = cut(day, breaks=break_points)) %>% group_by(hourCuts) %>%
  dplyr::summarize(Rnorm_ave = mean(R_norm, na.rm=T)) %>% mutate(rowNum = row_number())

#Use changepoint finder to break into pre- and post-response segments:
cPointFull <- cpt.var(diff(hourData$Rnorm_ave),method="BinSeg",Q=3)
decompEnd <- hourData$hourCuts[hourData$rowNum[min(cPointFull@cpts)]]
decompWindowStart <- cPointFull@cpts[1]/24 #Day that decomp started by changepoint

#Changepoint plot for FULL AVERAGED DATASET
ggplot() + theme_bw() +
  geom_point(data=hourData, aes(x=rowNum,y=Rnorm_ave)) +
  geom_col(aes(x=cPointFull@cpts[1],y=10),width=1,fill="darkred") +
  ylim(0,10)

# Based on this analysis, the decomposition response started around day 10

Data Analysis

Here we select day 21 to day 35 as the decomposition analysis window.

We background subtract the abiotic signal by subtracting the nondegradable PMMA signal from the biodegradable PHBV signal (Rnorm PHBV - Rnorm PMMA). This becomes the signal that we will analyze.

# Define Analysis Window -------------------------------------------------
res8$window <- "NA"

#Assign a window to calculate mean response from based on changepoint and garden sensor failure
#We selected the last 2 weeks of the experiment
windowStart <- 21
windowEnd <-35
res8[res8$day >= windowStart &
       res8$day <= windowEnd,]$window <- "Day 21-35" 

res8$treatment_type <- paste(res8$sensor_type, res8$soil_treatment)


# Abiotic Background Signal Subtraction -------------------------------------------

#We only care about the sensor signal that is due to biotic degradation, but we 
#know that the sensors will respond to certain abiotic factors as well (soil 
#moisture, temperature, conductivity, etc.). We want to subtract out the sensor 
#responses from abiotic factors. To do this we will subtract the sensor signal 
#of the non-biodegradable PMMA from the biodegradable PHBV sensor signal.

#Group the replicates into pairs of front PHBV/back PMMA for each soil sample
#and subtract the normalized resistance (PHBV Rnorm - PMMA Rnorm)
res8sub <- res8 %>%
  mutate(pair = ceiling(replicate / 2)) %>%
  group_by(pair, soil_type, soil_treatment,day,window,dateTime) %>%
  summarise(
    R_norm = R_norm[replicate %% 2 == 0] - R_norm[replicate %% 2 == 1]
  )

res8sub$dateTime <- as.POSIXct(res8sub$dateTime)
res8sub$soil_treatment <- as.factor(res8sub$soil_treatment)
res8sub$soil_type <- as.factor(res8sub$soil_type)

# Plot All Sensors -------------------------------------------------

#Define the color palette for sensor types
soilPalette <- c("BLANK" = "black", "PES" = "purple4", "LUF-R" = "darkgreen",
                 "ESF" = "brown4", "VSP" = "blue4", "LUF_F"="orange3",
                 "MASS_FO" = "purple1", "MASS_LA" = "green1",
                 "KBS_C" = "brown1", "KBS_R" = "blue1",
                 "Bat" = "purple3", "Garden" = "turquoise1",
                 "Ned" = "red4")
treatmentPalette <- c("PHBV BLANK" = "grey", "PMMA BLANK" = "grey",
                      "PHBV Live" = "orange", "PHBV Sterile" = "lightblue",
                      "PMMA Live" = "red3", "PMMA Sterile" = "blue3")


#Let's try some different individual plot appearances
res8sub <- subset(res8sub, soil_type != "BLANK") #Remove BLANK sensors

#All traces, all time
indPlot <- ggplot() + theme_bw() +
  geom_point(data=res8sub,
             aes(day,R_norm,col=soil_type,fill=soil_type), 
             size=.5,show_guide = FALSE) +
  ylab(expression("Normalized Resistance (R/R"[o]*")")) +
  scale_color_manual(name="Soil Type", values = soilPalette) +
  scale_fill_manual(name="Treatment Type", values = treatmentPalette) +
  xlim(0, max(res8sub$day)) + xlab("") + 
  theme(legend.position = "none") + ylim(-1,5) +
  facet_grid(rows=vars(soil_treatment),cols=vars(soil_type))
show(indPlot)

#It looks like the garden soil will need to be on a different scale
#Also let's put the Live and Sterile on the same axis for easy comparison

Plot Garden Soil on Seperate Axis

soil_treatment <- c("Live" = "darkblue",
                    "Sterile" = "darkred")

#Plot non-Garden soil traces
indPlot <- ggplot() + theme_bw() +
  geom_point(data=res8sub %>% filter(soil_type != "Garden"),
             aes(day,R_norm,col=soil_treatment,fill=soil_type), 
             size=.5,show_guide = FALSE) +
  geom_vline(xintercept = windowStart,color="black", linetype="dashed") +
  geom_vline(xintercept = windowEnd,color="black", linetype="dashed") +
  ylab(expression("R/R"[o]*" PHBV - R/R"[o]*" PMMA")) +
  scale_color_manual(name="Soil Type", values = soil_treatment) +
  theme(legend.position = "right", axis.text.x = element_text(angle =0, hjust = 1)) + 
  ylim(-2,5) +
  facet_wrap(~soil_type, nrow=2)
show(indPlot)

#Plot only Garden soil traces
indPlotGarden <- ggplot() + theme_bw() +
  geom_point(data=res8sub %>% filter(soil_type == "Garden"),
             aes(day,R_norm,col=soil_treatment,fill=soil_type), 
             size=.5,show_guide = FALSE) +
  geom_vline(xintercept = windowStart,color="black", linetype="dashed") +
  geom_vline(xintercept = windowEnd,color="black", linetype="dashed") +
  ylab(expression("R/R"[o]*" PHBV - R/R"[o]*" PMMA")) +
  scale_color_manual(name="Soil Type", values = soil_treatment) +
  theme(legend.position = "right", axis.text.x = element_text(angle =0, hjust = 1)) + 
  ylim(-3,80) +
  facet_wrap(~soil_type)
show(indPlotGarden)

Mean Responses

To obtain a single response value for each sensor, we take the mean value of (Rnorm PHBV - Rnorm PMMA) in the 21 to 35 day window. The 3 soil types with very large responses are Garden, KBS_C, and LUF-R.

#Now we will quntify the responses with the mean response in the defined window 
#(last 2 weeks of the experiment)

# Calculate Mean Responses ----------------------------------------------------
#First make the summary dataframe for the defined windows
windowMean <- res8sub %>% group_by(pair,window,soil_type, soil_treatment) %>%
  dplyr::summarize(phbvSubPmmaMean = mean(R_norm, na.rm=T))

# Plot Mean Responses ---------------------------------------------------------
#Plot the mean responses in the analysis window (last 2 weeks) excluding Garden soil
windowsMeanPlot <- ggplot(data=windowMean %>% filter(window != "NA") 
                             %>% filter(soil_type != "Garden")) + 
  theme_bw() +
  geom_point(aes(y=phbvSubPmmaMean, col=soil_type, shape=soil_treatment, 
                 x = reorder(soil_type, phbvSubPmmaMean)),
             position = position_jitterdodge(jitter.width = 0.4, dodge.width=0.5), 
             alpha = 0.8, size=3) +
  scale_color_manual(name = "soil type", values = soilPalette) +
  xlab("") + ylab(bquote("Mean (R/R"[o]*" PHBV - R/R"[o]*" PMMA)")) + 
  facet_wrap(~window, nrow=3) +  ylim(-1.5,8) +
  theme(legend.position = "right", axis.text.x = element_text(angle = 45, hjust = 1))
show(windowsMeanPlot)

#Plot the mean responses in the analysis window (last 2 weeks) only including Garden soil
windowsMeanPlotGarden <- ggplot(data=windowMean %>% filter(window != "NA") 
                          %>% filter(soil_type == "Garden")) + 
  theme_bw() +
  geom_point(aes(y=phbvSubPmmaMean, col=soil_type, shape=soil_treatment, 
                 x = reorder(soil_type, phbvSubPmmaMean)),
             position = position_jitterdodge(jitter.width = 0.4, dodge.width=0.5), 
             alpha = 0.7, size=3) +
  scale_color_manual(name = "soil type", values = soilPalette) +
  xlab("") + ylab(bquote("Mean (R/R"[o]*" PHBV - R/R"[o]*" PMMA)")) + 
  facet_wrap(~window, nrow=3) +  ylim(-1.5,40) +
  theme(legend.position = "right", axis.text.x = element_text(angle = 45, hjust = 1))
show(windowsMeanPlotGarden)

Arrange Summary Plot

#Arrange the plots into a single combo plot
tracesPlot <- ggarrange(indPlot,indPlotGarden,nrow=1,ncol=2,widths = c(5, 1.5),align="hv",
                        labels=c("(a)","(b)"))
meanPlot <- ggarrange(windowsMeanPlot,windowsMeanPlotGarden,nrow=1,ncol=2,widths = c(5, 1.5),align="hv",
                        labels=c("(c)","(d)"))
comboPlot <- ggarrange(tracesPlot,meanPlot,nrow=2,ncol=1, align="hv")
show(comboPlot)

#Save the combo plot as a .PNG
ggsave(plot=comboPlot, filename="comboPlot.png",width=16,height=12,dpi=300)