Whiting / Fierer Soil Decomposition Genetics Experiment 1

Experimental and Data Analysis Documentation

This R Publication is intended to provide updates on current and ongoing data analysis to the researches involved in the soil decomposition sensor / microbial genetic analysis experiment. Using this approach provides transparency regarding the data analysis methods used and improves communication between researchers.

Experimental Setup

We have 3 soil types, Nederland, Betasso, and Garden. Each soil type has 3 replicates of sterile and live mesocosms.

In each mescocosm, we have installed a double-sided decomposition sensor. One side has a printed PHBV bioplastic substrate, and the other has a printed PMMA (acrylic) substrate.

We also have 3 replicates where we have installed sensors in empty mesocosms (air).

Sensor resistance values were monitored by the BEEM Lab’s custom WiFi res8 dataloggers. Dataloggers uploaded readings to a Google Sheet every 30 minutes, alongside diagnostic data to monitor the incubator’s temperature and relative humidity.

# Required libraries ------------------------------------------------------
require(googlesheets4); require(lubridate); require(reshape)
library(ggplot2); library(ggpubr);library(cowplot); library(RColorBrewer)
library(scales)
library(tidyverse); library(dplyr); library(rstatix)

#Use these functions to point to subfolders for file management
wd_top <- function(){setwd(dirname(rstudioapi::getActiveDocumentContext()$path))}
wd_figures <- function(){wd_top(); setwd(paste(getwd(),"/figures",sep=""))}

Import data from Google Sheets

#You will need to associate this with a Google account to pull in GSheet Data
#Get the first logger's data to initialize the dataframe
sensorIDs <- c("601","602","603","604","605","606")
gData <- read_sheet("https://docs.google.com/spreadsheets/d/1GExCnPKsCB4zQ3B5TVzC9i2XAlUwAhJOvr8-tT9qSP8", sensorIDs[1])

#Initialize the dataframe
rawData <- data.frame(gData)
remainingSensors <- setdiff(sensorIDs, sensorIDs[1])

#Download the remaining loggers' data
for (theSensor in remainingSensors){
  gData <- read_sheet("https://docs.google.com/spreadsheets/d/1GExCnPKsCB4zQ3B5TVzC9i2XAlUwAhJOvr8-tT9qSP8", theSensor)
  newData <- data.frame(gData)
  rawData <- rbind(rawData, newData)
}

rm(gData, newData, sensorIDs, remainingSensors,theSensor) #Clean up

# Pull in Experiment Lookup Table -----------------------------------------
lData <- read_sheet("https://docs.google.com/spreadsheets/d/1UIwdcrfeHqmyhnNnboS45xjTZGszJLoxgp2UoDGVagM")

lookupTable <- data.frame(lData); rm(lData)

#Rename columns
names(rawData) <- c("dateTime", "device", "battery", "temperature", "rh",
                    "ch1", "ch2", "ch3", "ch4","ch5", "ch6", "ch7", "ch8",
                    "errorCode", "unknown")

#Make the timestamps into POSIXct format to make manipulation easier
rawData$dateTime <- as.POSIXct(rawData$dateTime, tz=UTC)

#Convert to Mountain Time
rawData$dateTime <- rawData$dateTime
attr(rawData$dateTime, "tzone") <- "America/Denver"

#Subset to only include data after the experiment started
startTime <- as.POSIXct("2025-04-09 18:00:00 MDT")
rawData <- subset(rawData, dateTime >= startTime)

#Subset to only include data before the experiment ended
endTime <- as.POSIXct("2025-05-09 24:00:00 MDT")
rawData <- subset(rawData, dateTime <= endTime)

rawData <- select(rawData, -c("errorCode", "unknown")) #Remove unused columns

Initial Data Processing

# Transform the Data ------------------------------------------------------
res8 <- melt(rawData, id = c("dateTime","device"), value.name="reading")

names(res8) <- c("dateTime","device","sensorKind","reading")

####Export the raw data to a csv file####
wd_top(); write.csv(res8, file='res8.csv')

# Import the res8 dataframe -----------------------------------------------
wd_top(); res8 <- read.csv('res8.csv') #Import .csv file
res8 <- select(res8, c('dateTime','device','sensorKind','reading'))

#Set variable types
res8$sensorKind <- as.factor(res8$sensorKind)
res8$device <- as.factor(res8$device)
res8$reading <- as.numeric(res8$reading)
res8$dateTime <- as.POSIXct(res8$dateTime)

####Split the data into two dataframes####
resSensors <- c("ch1","ch2","ch3","ch4","ch5","ch6","ch7","ch8")
diagnostics <- subset(res8, !(sensorKind %in% resSensors))
res8 <- subset(res8, sensorKind %in% resSensors)
names(res8) <- c("dateTime", "device", "sensor", "reading")

Plot the Diagnostic Data

These are data collected on-board each res8 Datalogger.

tempPlot <- ggplot(data=subset(diagnostics,sensorKind=="temperature"),
                   aes(dateTime,reading,fill=device)) + theme_bw() +
  geom_point(col="black",shape=21) + xlab("") + ylab("Temperature (C)") +
  scale_fill_brewer(palette = "Set3") +
  ggtitle("Logger Diagnostics") + theme(plot.title = element_text(hjust=0.5))

rhPlot <- ggplot(data=subset(diagnostics,sensorKind=="rh"),
                   aes(dateTime,reading,fill=device)) + theme_bw() +
  scale_fill_brewer(palette = "Set3") +
  geom_point(col="black",shape=21) + 
  xlab("") + ylab("Relative Humidity (%)")

battPlot <- ggplot(data=subset(diagnostics,sensorKind=="battery"),
                   aes(dateTime,reading,fill=device)) + theme_bw() + 
  scale_fill_brewer(palette = "Set3") +
  ylim(3.2, 4.2) +
  geom_point(col="black",shape=21) + 
  xlab("") + ylab("Battery Level (V)")
  
diagPlot <- ggarrange(tempPlot, rhPlot, battPlot, ncol=1, nrow=3, align = "hv",
          common.legend=TRUE,legend="right")
rm(tempPlot, rhPlot, battPlot,resSensors) #Clean up the workspace

diagPlot

wd_figures()
ggsave(plot=diagPlot, filename="diagPlot.png",height=15,width=20,dpi=300,
       bg="white")

Data Processing

res8 <- merge(res8,lookupTable,by=c("device","sensor"))

#Rename soil types
res8[res8$soil_type=="Ned",]$soil_type <- "Nederland"
res8[res8$soil_type=="Bat",]$soil_type <- "Betasso"

res8$replicate <- as.factor(res8$replicate)

res8$treatment <- as.factor(paste(res8$soil_type,res8$soil_treatment))

# Normalize Resistance Data -----------------------------------------------
#This shows how to do a calculation that is important for our resistive sensors:
#We will take any resistance readings that fall between the variables rStart and
#rEnd, and calculate the initial resistance for each sensor, then put those
#initial resistances back in the main res8 dataframe:
rStart <- min(res8$dateTime) + hours(12)
rEnd <- rStart + days(1)

#Get initialial resistances on a per-sensor basis
r0Data <- res8[res8$dateTime <= rEnd & res8$dateTime >= rStart,]
r0s <- r0Data %>% group_by(sensor) %>%
  dplyr::summarize(Ro = mean(reading, na.rm=T))

res8 <- merge(res8, r0s, by="sensor") #Merge Ro values with the dataframe
res8$R_norm <- res8$reading/res8$Ro #Calculate the normalized resistances
rm(r0Data, r0s) #Clean up the workspace

Plot Raw Resistance Data

#Define the color palette for soil types
soilPalette <- c("BLANK" = "grey", "Betasso" = "purple4", "Garden" = "darkgreen",
                 "Nederland" = "brown4")

####Show the raw data with the Ro window shown with dashed lines####
ggplot(data=res8, aes(x=dateTime, y=reading, fill=soil_type)) +
  theme_bw() +  geom_point(shape=21, col="black") +
  scale_fill_manual(name="Soil Type", values=soilPalette) +
  xlab("") + ylab("Sensor Resistance (\u03a9)") + 
  ggtitle("Raw Readings") + theme(plot.title = element_text(hjust=0.5)) +
  geom_vline(xintercept = rStart,color="darkgreen", linetype="dashed") +
  geom_vline(xintercept = rEnd,color="darkred", linetype="dashed") +
  ylim(0,10000) + 
  facet_wrap(~sensor_type + soil_treatment)

# Plot Normalized Resistance ----------------------------------------------
ggplot(data=res8, aes(x=dateTime, y=R_norm, fill=soil_type)) + 
  theme_bw() +
  geom_point(col="black",shape=21,alpha=0.75,size=2) +
  xlab("") + ylab(expression("Normalized Resistance (R/R"[o]*")")) + 
  ggtitle("Normalized Readings") + theme(plot.title = element_text(hjust=0.5)) +
  xlim(rEnd, max(res8$dateTime)) + ylim(0.25, 5) + 
  scale_fill_manual(name="Soil Type", values = soilPalette) +
  ylim(0,12) + 
  facet_wrap(~sensor_type + soil_treatment)

#Output the data as a .csv file
write.csv(res8,file='res8.csv')

Plot Averaged Signals

# Process average signals -------------------------------------------------
res8$treatment_type <- paste(res8$sensor_type, res8$soil_treatment)

aveData <- res8 %>% 
  mutate(dateTime = cut(dateTime, breaks='2 hour')) %>% 
  group_by(treatment_type, soil_type, dateTime) %>%
  dplyr::summarize(stDev = sd(R_norm), R_norm = mean(R_norm, na.rm=T))

aveData$dateTime <- as.POSIXct(aveData$dateTime)
aveData$treatment_type <- as.factor(aveData$treatment_type)
aveData$soil_type <- as.factor(aveData$soil_type)

####Plot facet grid####

#Define the color palette for sensor types
soilPalette <- c("BLANK" = "black", "Betasso" = "purple4", 
                 "Garden" = "darkgreen","Nederland" = "brown4")
treatmentPalette <- c("PHBV BLANK" = "grey", "PMMA BLANK" = "grey",
                      "PHBV Live" = "orange", "PHBV Sterile" = "lightblue",
                      "PMMA Live" = "red3", "PMMA Sterile" = "blue3")

####Plot average sensor signals####
aveData <- subset(aveData, treatment_type != "PHBV BLANK")
aveData <- subset(aveData, treatment_type != "PMMA BLANK")

avePlot <- ggplot() + theme_bw() +
  geom_ribbon(data=aveData, aes(x=dateTime,
                                ymax=R_norm+stDev,ymin=R_norm-stDev,
                                col=soil_type,fill=treatment_type),alpha=0.25) +
  geom_point(data=aveData,
             aes(dateTime,R_norm,col=soil_type,fill=treatment_type), 
             size=2,show_guide = FALSE) +
  ylab(expression("Normalized Resistance (R/R"[o]*")")) +
  scale_color_manual(name="Soil Type", values = soilPalette) +
  scale_fill_manual(name="Treatment Type", values = treatmentPalette) +
  xlim(rStart, max(aveData$dateTime)) + xlab("") + 
  theme(legend.position = "none") +
  facet_grid(rows=vars(soil_type),cols=vars(treatment_type)) +
  ylim(0,4)

show(avePlot)

#Save the figure to OneDrive
wd_figures()
ggsave(plot=avePlot, filename="avePlot.png",height=7.5,width=10,dpi=150,
       bg="white")

Plot Individual Sensor Signals

res8 <- subset(res8, treatment_type != "PHBV BLANK")
res8 <- subset(res8, treatment_type != "PMMA BLANK")

indPlot <- ggplot() + theme_bw() +
  geom_point(data=res8,
             aes(dateTime,R_norm,col=soil_type,fill=treatment_type), 
             size=.5,show_guide = FALSE) +
  ylab(expression("Normalized Resistance (R/R"[o]*")")) +
  scale_color_manual(name="Soil Type", values = soilPalette) +
  scale_fill_manual(name="Treatment Type", values = treatmentPalette) +
  xlim(rStart, max(aveData$dateTime)) + xlab("") + 
  theme(legend.position = "none") + ylim(0,10) +
  facet_grid(rows=vars(soil_type),cols=vars(treatment_type))

show(indPlot)

#Save the figure to OneDrive
wd_figures()
ggsave(plot=indPlot, filename="indPlot.png",height=7.5,width=10,dpi=150,
       bg="white")

Plot long time series based on treatment types

aveData$treatment <- paste(aveData$soil_type, aveData$treatment_type)

#Define the color palette for treatment types

typePalette <- c("Betasso PHBV Live" = "purple4",
                 "Betasso PHBV Sterile" = "purple3",
                 "Betasso PMMA Live" = "purple2",
                 "Betasso PMMA Sterile" = "purple1",
                 "Garden PHBV Live" = "green4",
                 "Garden PHBV Sterile" = "green3",
                 "Garden PMMA Live" = "green2",
                 "Garden PMMA Sterile" = "green1",
                 "Nederland PHBV Live" = "brown4",
                 "Nederland PHBV Sterile" = "brown3",
                 "Nederland PMMA Live" = "brown2",
                 "Nederland PMMA Sterile" = "brown1")

topPlot <- ggplot() + theme_bw() +
  geom_point(data=aveData,
             aes(dateTime,R_norm,col=treatment,fill=treatment,
                 shape=treatment_type),size=1) +
  ylab(expression("Normalized Resistance (R/R"[o]*")")) +
  scale_color_manual(name="Treatment", values = typePalette) +
  scale_fill_manual(name="Treatment", values = typePalette) +
  guides(shape="none") +
  xlim(rStart, max(aveData$dateTime)) + xlab("") + 
  theme(legend.position = "right") + ylim(0,2.5) +
  facet_grid(rows=vars(soil_type))

bottomPlot <- ggplot() + theme_bw() +
  geom_point(data=subset(aveData,treatment=="Garden PHBV Live"),
             aes(dateTime,R_norm,col=treatment,fill=treatment), 
             size=2) +
  ylab(expression("Normalized Resistance (R/R"[o]*")")) +
  scale_color_manual(name="Treatment", values = typePalette) +
  scale_fill_manual(name="Treatment", values = typePalette) +
  xlim(rStart, max(aveData$dateTime)) + xlab("") + 
  theme(legend.position = "none") + ylim(0,20)

bigPlot <- ggarrange(topPlot, bottomPlot, nrow=2, ncol=1,
                       align="hv")

show(bigPlot)

wd_figures()
ggsave(plot=bigPlot, filename="bigPlot.png", height=8,width=12,
       dpi=150, bg="white")

Calculate and display sensor slopes

#Get number of days since start of experiment
res8$tDay <- as.numeric(res8$dateTime - rEnd)/60/24

res8$window <- "NA"

#Assign a window to get slopes within
res8[res8$dateTime >= rEnd &
       res8$dateTime <= as.POSIXct("2025-05-01"),]$window <- "Long Window"

windowData <- subset(res8, window != "NA")

#Get individual sensor slopes                     
slopesWindow <- windowData %>% 
  group_by(replicate, soil_type, soil_treatment, sensor_type) %>%
  dplyr::summarize(slope = summary(lm(R_norm ~ tDay + tDay^2))$coefficients[2])

slopesWindow$soil_type <- as.factor(slopesWindow$soil_type)
slopesWindow$soil_treatment <- as.factor(slopesWindow$soil_treatment)
slopesWindow$sensor_type <- as.factor(slopesWindow$sensor_type)

slopesWindow$full_treatment <- paste(slopesWindow$soil_type,
                                     slopesWindow$soil_treatment,
                                     slopesWindow$sensor_type)

####Check to see if slopes are normally distributed####
normTest <- slopesWindow %>% group_by(full_treatment) %>%
  summarise(shap = tidy(shapiro.test(slope))) %>% unnest(shap)

slopesWindow$treatment <- paste(slopesWindow$soil_type,
                                slopesWindow$sensor_type,
                                slopesWindow$soil_treatment)

####Plot the slopes as box plots####
boxPlot <- ggplot(data=subset(slopesWindow, soil_type != "BLANK")) + 
  theme_bw() +
  geom_boxplot(aes(x=treatment, y=slope, fill=treatment),
               outlier.shape=NA, show.legend = FALSE) +
  geom_point(aes(x=treatment, y=slope, fill=treatment),
             position = position_jitterdodge(), alpha = 0.3, size=2) +
  scale_fill_manual(name="Treatment", values = typePalette) +
  xlab("") + ylab(bquote('Sensor Slope '(day^-1))) +
  scale_x_discrete(labels = label_wrap(8)) +
  theme(legend.position = "none")

show(boxPlot)

#Combine the three plots
basePlot <- ggarrange(bottomPlot + xlim(rEnd, as.POSIXct("2025-05-05")) +
                        ggtitle("Garden PHBV Live"), 
                      boxPlot, nrow=1, ncol=2,widths=c(4,6),
                      align="hv")

finalPlot <- ggarrange(topPlot, basePlot, nrow=2, ncol=1)

show(finalPlot)

wd_figures()
ggsave(plot=finalPlot, filename="finalPlot.png", height=8,width=12,
       dpi=150, bg="white")