This R Publication is intended to provide updates on current and ongoing data analysis to the researches involved in the soil decomposition sensor / microbial genetic analysis experiment. Using this approach provides transparency regarding the data analysis methods used and improves communication between researchers.
We have 3 soil types, Nederland, Betasso, and Garden. Each soil type has 3 replicates of sterile and live mesocosms.
In each mescocosm, we have installed a double-sided decomposition sensor. One side has a printed PHBV bioplastic substrate, and the other has a printed PMMA (acrylic) substrate.
We also have 3 replicates where we have installed sensors in empty mesocosms (air).
Sensor resistance values were monitored
by the BEEM Lab’s custom WiFi res8
dataloggers. Dataloggers uploaded readings to a Google Sheet every
30 minutes, alongside diagnostic data to monitor the incubator’s
temperature and relative humidity.
# Required libraries ------------------------------------------------------
require(googlesheets4); require(lubridate); require(reshape)
library(ggplot2); library(ggpubr);library(cowplot); library(RColorBrewer)
library(scales)
library(tidyverse); library(dplyr); library(rstatix)
#Use these functions to point to subfolders for file management
wd_top <- function(){setwd(dirname(rstudioapi::getActiveDocumentContext()$path))}
wd_figures <- function(){wd_top(); setwd(paste(getwd(),"/figures",sep=""))}
#You will need to associate this with a Google account to pull in GSheet Data
#Get the first logger's data to initialize the dataframe
sensorIDs <- c("601","602","603","604","605","606")
gData <- read_sheet("https://docs.google.com/spreadsheets/d/1GExCnPKsCB4zQ3B5TVzC9i2XAlUwAhJOvr8-tT9qSP8", sensorIDs[1])
#Initialize the dataframe
rawData <- data.frame(gData)
remainingSensors <- setdiff(sensorIDs, sensorIDs[1])
#Download the remaining loggers' data
for (theSensor in remainingSensors){
gData <- read_sheet("https://docs.google.com/spreadsheets/d/1GExCnPKsCB4zQ3B5TVzC9i2XAlUwAhJOvr8-tT9qSP8", theSensor)
newData <- data.frame(gData)
rawData <- rbind(rawData, newData)
}
rm(gData, newData, sensorIDs, remainingSensors,theSensor) #Clean up
# Pull in Experiment Lookup Table -----------------------------------------
lData <- read_sheet("https://docs.google.com/spreadsheets/d/1UIwdcrfeHqmyhnNnboS45xjTZGszJLoxgp2UoDGVagM")
lookupTable <- data.frame(lData); rm(lData)
#Rename columns
names(rawData) <- c("dateTime", "device", "battery", "temperature", "rh",
"ch1", "ch2", "ch3", "ch4","ch5", "ch6", "ch7", "ch8",
"errorCode", "unknown")
#Make the timestamps into POSIXct format to make manipulation easier
rawData$dateTime <- as.POSIXct(rawData$dateTime, tz=UTC)
#Convert to Mountain Time
rawData$dateTime <- rawData$dateTime
attr(rawData$dateTime, "tzone") <- "America/Denver"
#Subset to only include data after the experiment started
startTime <- as.POSIXct("2025-04-09 18:00:00 MDT")
rawData <- subset(rawData, dateTime >= startTime)
#Subset to only include data before the experiment ended
endTime <- as.POSIXct("2025-05-09 24:00:00 MDT")
rawData <- subset(rawData, dateTime <= endTime)
rawData <- select(rawData, -c("errorCode", "unknown")) #Remove unused columns
# Transform the Data ------------------------------------------------------
res8 <- melt(rawData, id = c("dateTime","device"), value.name="reading")
names(res8) <- c("dateTime","device","sensorKind","reading")
####Export the raw data to a csv file####
wd_top(); write.csv(res8, file='res8.csv')
# Import the res8 dataframe -----------------------------------------------
wd_top(); res8 <- read.csv('res8.csv') #Import .csv file
res8 <- select(res8, c('dateTime','device','sensorKind','reading'))
#Set variable types
res8$sensorKind <- as.factor(res8$sensorKind)
res8$device <- as.factor(res8$device)
res8$reading <- as.numeric(res8$reading)
res8$dateTime <- as.POSIXct(res8$dateTime)
####Split the data into two dataframes####
resSensors <- c("ch1","ch2","ch3","ch4","ch5","ch6","ch7","ch8")
diagnostics <- subset(res8, !(sensorKind %in% resSensors))
res8 <- subset(res8, sensorKind %in% resSensors)
names(res8) <- c("dateTime", "device", "sensor", "reading")
These are data collected on-board each res8 Datalogger.
tempPlot <- ggplot(data=subset(diagnostics,sensorKind=="temperature"),
aes(dateTime,reading,fill=device)) + theme_bw() +
geom_point(col="black",shape=21) + xlab("") + ylab("Temperature (C)") +
scale_fill_brewer(palette = "Set3") +
ggtitle("Logger Diagnostics") + theme(plot.title = element_text(hjust=0.5))
rhPlot <- ggplot(data=subset(diagnostics,sensorKind=="rh"),
aes(dateTime,reading,fill=device)) + theme_bw() +
scale_fill_brewer(palette = "Set3") +
geom_point(col="black",shape=21) +
xlab("") + ylab("Relative Humidity (%)")
battPlot <- ggplot(data=subset(diagnostics,sensorKind=="battery"),
aes(dateTime,reading,fill=device)) + theme_bw() +
scale_fill_brewer(palette = "Set3") +
ylim(3.2, 4.2) +
geom_point(col="black",shape=21) +
xlab("") + ylab("Battery Level (V)")
diagPlot <- ggarrange(tempPlot, rhPlot, battPlot, ncol=1, nrow=3, align = "hv",
common.legend=TRUE,legend="right")
rm(tempPlot, rhPlot, battPlot,resSensors) #Clean up the workspace
diagPlot
wd_figures()
ggsave(plot=diagPlot, filename="diagPlot.png",height=15,width=20,dpi=300,
bg="white")
res8 <- merge(res8,lookupTable,by=c("device","sensor"))
#Rename soil types
res8[res8$soil_type=="Ned",]$soil_type <- "Nederland"
res8[res8$soil_type=="Bat",]$soil_type <- "Betasso"
res8$replicate <- as.factor(res8$replicate)
res8$treatment <- as.factor(paste(res8$soil_type,res8$soil_treatment))
# Normalize Resistance Data -----------------------------------------------
#This shows how to do a calculation that is important for our resistive sensors:
#We will take any resistance readings that fall between the variables rStart and
#rEnd, and calculate the initial resistance for each sensor, then put those
#initial resistances back in the main res8 dataframe:
rStart <- min(res8$dateTime) + hours(12)
rEnd <- rStart + days(1)
#Get initialial resistances on a per-sensor basis
r0Data <- res8[res8$dateTime <= rEnd & res8$dateTime >= rStart,]
r0s <- r0Data %>% group_by(sensor) %>%
dplyr::summarize(Ro = mean(reading, na.rm=T))
res8 <- merge(res8, r0s, by="sensor") #Merge Ro values with the dataframe
res8$R_norm <- res8$reading/res8$Ro #Calculate the normalized resistances
rm(r0Data, r0s) #Clean up the workspace
#Define the color palette for soil types
soilPalette <- c("BLANK" = "grey", "Betasso" = "purple4", "Garden" = "darkgreen",
"Nederland" = "brown4")
####Show the raw data with the Ro window shown with dashed lines####
ggplot(data=res8, aes(x=dateTime, y=reading, fill=soil_type)) +
theme_bw() + geom_point(shape=21, col="black") +
scale_fill_manual(name="Soil Type", values=soilPalette) +
xlab("") + ylab("Sensor Resistance (\u03a9)") +
ggtitle("Raw Readings") + theme(plot.title = element_text(hjust=0.5)) +
geom_vline(xintercept = rStart,color="darkgreen", linetype="dashed") +
geom_vline(xintercept = rEnd,color="darkred", linetype="dashed") +
ylim(0,10000) +
facet_wrap(~sensor_type + soil_treatment)
# Plot Normalized Resistance ----------------------------------------------
ggplot(data=res8, aes(x=dateTime, y=R_norm, fill=soil_type)) +
theme_bw() +
geom_point(col="black",shape=21,alpha=0.75,size=2) +
xlab("") + ylab(expression("Normalized Resistance (R/R"[o]*")")) +
ggtitle("Normalized Readings") + theme(plot.title = element_text(hjust=0.5)) +
xlim(rEnd, max(res8$dateTime)) + ylim(0.25, 5) +
scale_fill_manual(name="Soil Type", values = soilPalette) +
ylim(0,12) +
facet_wrap(~sensor_type + soil_treatment)
#Output the data as a .csv file
write.csv(res8,file='res8.csv')
# Process average signals -------------------------------------------------
res8$treatment_type <- paste(res8$sensor_type, res8$soil_treatment)
aveData <- res8 %>%
mutate(dateTime = cut(dateTime, breaks='2 hour')) %>%
group_by(treatment_type, soil_type, dateTime) %>%
dplyr::summarize(stDev = sd(R_norm), R_norm = mean(R_norm, na.rm=T))
aveData$dateTime <- as.POSIXct(aveData$dateTime)
aveData$treatment_type <- as.factor(aveData$treatment_type)
aveData$soil_type <- as.factor(aveData$soil_type)
####Plot facet grid####
#Define the color palette for sensor types
soilPalette <- c("BLANK" = "black", "Betasso" = "purple4",
"Garden" = "darkgreen","Nederland" = "brown4")
treatmentPalette <- c("PHBV BLANK" = "grey", "PMMA BLANK" = "grey",
"PHBV Live" = "orange", "PHBV Sterile" = "lightblue",
"PMMA Live" = "red3", "PMMA Sterile" = "blue3")
####Plot average sensor signals####
aveData <- subset(aveData, treatment_type != "PHBV BLANK")
aveData <- subset(aveData, treatment_type != "PMMA BLANK")
avePlot <- ggplot() + theme_bw() +
geom_ribbon(data=aveData, aes(x=dateTime,
ymax=R_norm+stDev,ymin=R_norm-stDev,
col=soil_type,fill=treatment_type),alpha=0.25) +
geom_point(data=aveData,
aes(dateTime,R_norm,col=soil_type,fill=treatment_type),
size=2,show_guide = FALSE) +
ylab(expression("Normalized Resistance (R/R"[o]*")")) +
scale_color_manual(name="Soil Type", values = soilPalette) +
scale_fill_manual(name="Treatment Type", values = treatmentPalette) +
xlim(rStart, max(aveData$dateTime)) + xlab("") +
theme(legend.position = "none") +
facet_grid(rows=vars(soil_type),cols=vars(treatment_type)) +
ylim(0,4)
show(avePlot)
#Save the figure to OneDrive
wd_figures()
ggsave(plot=avePlot, filename="avePlot.png",height=7.5,width=10,dpi=150,
bg="white")
res8 <- subset(res8, treatment_type != "PHBV BLANK")
res8 <- subset(res8, treatment_type != "PMMA BLANK")
indPlot <- ggplot() + theme_bw() +
geom_point(data=res8,
aes(dateTime,R_norm,col=soil_type,fill=treatment_type),
size=.5,show_guide = FALSE) +
ylab(expression("Normalized Resistance (R/R"[o]*")")) +
scale_color_manual(name="Soil Type", values = soilPalette) +
scale_fill_manual(name="Treatment Type", values = treatmentPalette) +
xlim(rStart, max(aveData$dateTime)) + xlab("") +
theme(legend.position = "none") + ylim(0,10) +
facet_grid(rows=vars(soil_type),cols=vars(treatment_type))
show(indPlot)
#Save the figure to OneDrive
wd_figures()
ggsave(plot=indPlot, filename="indPlot.png",height=7.5,width=10,dpi=150,
bg="white")
aveData$treatment <- paste(aveData$soil_type, aveData$treatment_type)
#Define the color palette for treatment types
typePalette <- c("Betasso PHBV Live" = "purple4",
"Betasso PHBV Sterile" = "purple3",
"Betasso PMMA Live" = "purple2",
"Betasso PMMA Sterile" = "purple1",
"Garden PHBV Live" = "green4",
"Garden PHBV Sterile" = "green3",
"Garden PMMA Live" = "green2",
"Garden PMMA Sterile" = "green1",
"Nederland PHBV Live" = "brown4",
"Nederland PHBV Sterile" = "brown3",
"Nederland PMMA Live" = "brown2",
"Nederland PMMA Sterile" = "brown1")
topPlot <- ggplot() + theme_bw() +
geom_point(data=aveData,
aes(dateTime,R_norm,col=treatment,fill=treatment,
shape=treatment_type),size=1) +
ylab(expression("Normalized Resistance (R/R"[o]*")")) +
scale_color_manual(name="Treatment", values = typePalette) +
scale_fill_manual(name="Treatment", values = typePalette) +
guides(shape="none") +
xlim(rStart, max(aveData$dateTime)) + xlab("") +
theme(legend.position = "right") + ylim(0,2.5) +
facet_grid(rows=vars(soil_type))
bottomPlot <- ggplot() + theme_bw() +
geom_point(data=subset(aveData,treatment=="Garden PHBV Live"),
aes(dateTime,R_norm,col=treatment,fill=treatment),
size=2) +
ylab(expression("Normalized Resistance (R/R"[o]*")")) +
scale_color_manual(name="Treatment", values = typePalette) +
scale_fill_manual(name="Treatment", values = typePalette) +
xlim(rStart, max(aveData$dateTime)) + xlab("") +
theme(legend.position = "none") + ylim(0,20)
bigPlot <- ggarrange(topPlot, bottomPlot, nrow=2, ncol=1,
align="hv")
show(bigPlot)
wd_figures()
ggsave(plot=bigPlot, filename="bigPlot.png", height=8,width=12,
dpi=150, bg="white")
#Get number of days since start of experiment
res8$tDay <- as.numeric(res8$dateTime - rEnd)/60/24
res8$window <- "NA"
#Assign a window to get slopes within
res8[res8$dateTime >= rEnd &
res8$dateTime <= as.POSIXct("2025-05-01"),]$window <- "Long Window"
windowData <- subset(res8, window != "NA")
#Get individual sensor slopes
slopesWindow <- windowData %>%
group_by(replicate, soil_type, soil_treatment, sensor_type) %>%
dplyr::summarize(slope = summary(lm(R_norm ~ tDay + tDay^2))$coefficients[2])
slopesWindow$soil_type <- as.factor(slopesWindow$soil_type)
slopesWindow$soil_treatment <- as.factor(slopesWindow$soil_treatment)
slopesWindow$sensor_type <- as.factor(slopesWindow$sensor_type)
slopesWindow$full_treatment <- paste(slopesWindow$soil_type,
slopesWindow$soil_treatment,
slopesWindow$sensor_type)
####Check to see if slopes are normally distributed####
normTest <- slopesWindow %>% group_by(full_treatment) %>%
summarise(shap = tidy(shapiro.test(slope))) %>% unnest(shap)
slopesWindow$treatment <- paste(slopesWindow$soil_type,
slopesWindow$sensor_type,
slopesWindow$soil_treatment)
####Plot the slopes as box plots####
boxPlot <- ggplot(data=subset(slopesWindow, soil_type != "BLANK")) +
theme_bw() +
geom_boxplot(aes(x=treatment, y=slope, fill=treatment),
outlier.shape=NA, show.legend = FALSE) +
geom_point(aes(x=treatment, y=slope, fill=treatment),
position = position_jitterdodge(), alpha = 0.3, size=2) +
scale_fill_manual(name="Treatment", values = typePalette) +
xlab("") + ylab(bquote('Sensor Slope '(day^-1))) +
scale_x_discrete(labels = label_wrap(8)) +
theme(legend.position = "none")
show(boxPlot)
#Combine the three plots
basePlot <- ggarrange(bottomPlot + xlim(rEnd, as.POSIXct("2025-05-05")) +
ggtitle("Garden PHBV Live"),
boxPlot, nrow=1, ncol=2,widths=c(4,6),
align="hv")
finalPlot <- ggarrange(topPlot, basePlot, nrow=2, ncol=1)
show(finalPlot)
wd_figures()
ggsave(plot=finalPlot, filename="finalPlot.png", height=8,width=12,
dpi=150, bg="white")