Polytunnel Experiment - Experimental and Data Analysis Documentation

This R Publication is intended to provide updates on current and ongoing data analysis to the researches involved in the UK Polytunnel Decomposition Sensors Experiment in Lancaster. Using this approach provides transparency regarding the data analysis methods used, and improves communication between researchers. You can see more photographs of the deployment here.

We have (6) replicates in each of (6) total treatments: Species Rich Control, watered roughly once per week. Wheat Control, watered roughly once per week. Species Rich Flood -> converted to Species Rich Control conditions Wheat Flood -> converted to Wheat Control conditions Species Rich Drought -> converted to Species Rich Control conditions Wheat Drought -> converted to Wheat Control conditions

We have also installed 5 experimental multiplexed (MUX) loggers which record data from independent traces on the decomposition sensors. These were installed primarily in Drought pots.

Data Analysis, using the R programming language:

Required packages

# Load Libraries ----------------------------------------------------------
library(tidyverse); library(dplyr); library(lubridate);  library(scales)
library(ggplot2); library(cowplot); library(RColorBrewer); 
library(reshape2);library(DescTools); library(ggpubr);
library(rstatix) #For t-tests
library(effsize) #For effect size calculations
library(plotrix) #For standard error calculations

Import data from standard parallelized sensor loggers

# Import and Process Text Files -------------------------------------------
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
exPlots <- FALSE #Determines whether plots are saved

####IMPORT STANDARD LOGGER DATA####
#Pull data in from text files and format timestamps
setwd(paste(dirname(rstudioapi::getActiveDocumentContext()$path),
            "/6-Channel Loggers/6.18.24",sep=""))

allLoggers <- c("Device 1", "Device 2", "Device 4", "Device 6",
                "Device 8", "Device 9")

allData <- data.frame(V1=0, V2=0, V3=0, V4=0, V5=0,V6=0,V7=0,V8=0,V9=0)

#Process all separate text files into one dataframe
for(logger in allLoggers){
  fileName <- paste(logger,".txt",sep="")
  newData <- read.table(fileName,sep=',', header=FALSE)
  allData <- rbind(allData, newData)
}

allData <- allData[-1,]; rm(newData, fileName, logger) #Delete initializing row

#Add header names
newNames = c("deviceID", "dateTime", "temp", "sen1", "sen2", "sen3",
             "sen4", "sen5", "sen6")
names(allData) <- newNames; rm(newNames)

#Subset to certain time range
allData$dateTime <- as.POSIXct(allData$dateTime, format = " %m/%d/%y %H:%M:%S")

#The date that the treatments were reversed
changeDate <- as.POSIXct("2024-05-13 12:00:00")

#R_o will be the mean resistance value between these dateTimes:
rStart <- as.POSIXct("2024-04-28 00:00:00"); rEnd <- rStart + hours(12)

#Get a device number for each row
allData$deviceNum <- as.numeric(gsub("\\D","",allData$deviceID))

####Break Sensors into individual DF Rows####
#Initialize large storage dataframe
bigDF <- data.frame(dateTime=as.POSIXct(0, origin = "1960-01-01"), devID=0, 
                    senID=0, temp=0, reading=0, Ro=0)

#Brute force splitting sensor readings into separate rows
for(device in c(1,2,4,6,8,9)){
  tempDF <- subset(allData, allData$deviceNum==device) #One logger at a time
  rDF <- na.omit(tempDF[tempDF$dateTime >= rStart & tempDF$dateTime <= rEnd,])
  
  tempDF1 <- data.frame(dateTime=tempDF$dateTime, devID = tempDF$deviceNum,
                        senID = 1,temp=tempDF$temp,reading=tempDF$sen1,
                        Ro=mean(rDF$sen1))
  tempDF2 <- data.frame(dateTime=tempDF$dateTime, devID = tempDF$deviceNum,
                        senID = 2,temp=tempDF$temp,reading=tempDF$sen2,
                        Ro=mean(rDF$sen2))
  tempDF3 <- data.frame(dateTime=tempDF$dateTime, devID = tempDF$deviceNum,
                        senID = 3,temp=tempDF$temp,reading=tempDF$sen3,
                        Ro=mean(rDF$sen3))
  tempDF4 <- data.frame(dateTime=tempDF$dateTime, devID = tempDF$deviceNum,
                        senID = 4,temp=tempDF$temp,reading=tempDF$sen4,
                        Ro=mean(rDF$sen4))
  tempDF5 <- data.frame(dateTime=tempDF$dateTime, devID = tempDF$deviceNum,
                        senID = 5,temp=tempDF$temp,reading=tempDF$sen5,
                        Ro=mean(rDF$sen5))
  tempDF6 <- data.frame(dateTime=tempDF$dateTime, devID = tempDF$deviceNum,
                        senID = 6,temp=tempDF$temp,reading=tempDF$sen6,
                        Ro=mean(rDF$sen6))
  #Combine all the DFs    
  bigDF <- rbind(bigDF, tempDF1, tempDF2, tempDF3, tempDF4, tempDF5, tempDF6)
  rm(tempDF1, tempDF2, tempDF3, tempDF4, tempDF5, tempDF6) #Delete temp DFs
}

bigDF<-bigDF[-1,]; rm(rDF) #Delete the top initializing row
bigDF$sensor <- as.factor(paste("D",bigDF$devID,"-",bigDF$senID,sep=""))

allData <- bigDF #Finally, replace allData with the big dataframe
allData <- na.omit(allData)
rm(bigDF, tempDF, device) #Clean up workspace

Data Processing for Parallelized Sensor Loggers

####Fix Device 8 Lost Timestamps (two RTC failures)####
d8Data <- subset(allData, devID==8)

offData <- subset(d8Data, dateTime <= as.POSIXct("2022-01-01"))
d8Data <- subset(d8Data, dateTime > as.POSIXct("2022-01-01")) #Temporary
#Get changepoint indices
row.names(offData) <- array(1:nrow(offData)) #Replace row names
cps <- offData[offData$dateTime < lag(offData$dateTime),]; cps <- cps[-1,]
cps <- rbind(offData[1,],cps)

cps$ind <- order(as.numeric(row.names(cps)))

#Run a loop to apply two different offsets to lost-timestamp data
#offset1 <- years(24) + months(4) +  days(12) + hours(12) + minutes(15)
offset1 <- years(24) + months(4) + days(12) + hours(12) + minutes(15)
offset2 <- offset1 + days(9) + hours(8)

#Alternate applying offsets to fix timestamp issues
for(index in 1:11){
 if(IsOdd(index)){offData[as.numeric(row.names(cps[cps$ind==index,])):
             as.numeric(row.names(cps[cps$ind==index+1,])),]$dateTime <-
   offData[as.numeric(row.names(cps[cps$ind==index,])):
             as.numeric(row.names(cps[cps$ind==index+1,])),]$dateTime + offset1
 }
 else{offData[as.numeric(row.names(cps[cps$ind==index,])):
             as.numeric(row.names(cps[cps$ind==index+1,])),]$dateTime <-
   offData[as.numeric(row.names(cps[cps$ind==index,])):
             as.numeric(row.names(cps[cps$ind==index+1,])),]$dateTime + offset2
 }
}

offData[offData$dateTime <= as.POSIXct("2022-01-01"),]$dateTime <-
  offData[offData$dateTime <= as.POSIXct("2022-01-01"),]$dateTime + offset2

offData <- offData[offData$dateTime <= as.POSIXct("2030-01-01"),]

#Recombine with larger dataframe
d8Data <- rbind(d8Data, offData)
allData <- subset(allData, devID!=8); allData <- rbind(allData, d8Data)
rm(d8Data)

####Fix weird garbage data from Device 9####
allData[allData$devID=="9" & allData$dateTime <= changeDate+minutes(20) &
          allData$dateTime >= changeDate+minutes(15),] <- NA
allData <- na.omit(allData)

#Subset to specific date range
startDate <- as.POSIXct("2024-04-27 00:00:00"); endDate <- max(allData$dateTime)
allData <- allData[allData$dateTime >= startDate,]

####Normalize Resistances for all sensors####
allData <- allData[allData$reading <= 10000,] #Clean up bad data
allData$Rnorm <- allData$reading/allData$Ro

Data Processing

####Map treatments to sensor ports####
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
lookupTable <- read.csv('lookupTable.csv')
treatmentLookup <- data.frame(sensor=lookupTable$deviceID,
                              treatment=lookupTable$Treatment)
allData <- merge(treatmentLookup, allData, by="sensor")

#MUX data are not included due to differences in the logger electronics#
muxLookup <- data.frame(devID=lookupTable$MUX,treatment=lookupTable$Treatment)
muxData <- merge(muxData, muxLookup, by="devID")
muxLookup <- data.frame(sensor=lookupTable$MUX,treatment=lookupTable$Treatment)
parMux <- merge(muxLookup, parMux, by="sensor")

####Parallelize the MUX channels data (currently unused)####
# par_function <- function(x){
#   oneOver <- sum(1/x); calc <- 1/oneOver
#   return(calc)}
# 
# parData <- muxData %>% group_by(dateTime, devID) %>%
#   dplyr::reframe(parRes = par_function(reading))

Some more data processing

####Combine MUX with Parallelized data if desired####
#moreData <- rbind(allData, parMux) #Combine MUX and Parallelized Data
#allData <- moreData #Chose whether to include MUX data

####Average all data, sensor-wise, by some timestep####
aveData <- allData %>%
  mutate(dateTime = cut(dateTime, breaks="24 hour")) %>% 
  group_by(devID, senID, sensor, treatment, dateTime) %>%
  dplyr::summarize(Rnorm = mean(Rnorm, na.rm=T))

aveData$dateTime <- as.POSIXct(aveData$dateTime)

Define the plotting colors and shapes that will be used throughout

####Set colors and shapes for treatments####
treatmentColors <- c("Species rich Control" = "darkgreen", "Wheat Control" = "lightgreen",
                     "Species rich Flood" = "darkblue", "Wheat Flood" = "lightblue",
                     "Species rich Drought" = "brown", "Wheat Drought" = "gold")

treatmentShapes <- c("Species rich Control" = 19, "Wheat Control" = 17,
                     "Species rich Flood" = 19, "Wheat Flood" = 17,
                     "Species rich Drought" = 19, "Wheat Drought" = 17)

All sensor data, color-coded by treatment and overlaid

Note that in all of the following plots, the date at which treatment pots were brought back towards control conditions (watering drought treatment pots, drying flood treatment pots) is denoted by a dashed red vertical line.

####Plot all RAW sensor outputs####
allData$treatment <- as.factor(allData$treatment)
rawPlot <- ggplot() +
  geom_point(data=allData, aes(dateTime,Rnorm,col=treatment), size=1) +
  geom_vline(xintercept=changeDate, color="red", linetype="dashed") +
  scale_color_manual(name = "Pot Treatment", values = treatmentColors) +
  xlab("") + ylab(expression("Normalized Resistance (R/R"[o]*")")) +
  xlim(rStart, endDate)  + theme_bw()

#Prepare plots with various ways of visualizing the data
facPlot <- rawPlot + facet_wrap(treatment ~ sensor) + ylim(0.55,5) +
  theme(legend.position = "none")
facPlotFree <- rawPlot + facet_wrap(treatment ~ sensor, scales="free_y") +
  theme(legend.position = "none")
facPlotTreat <- rawPlot + facet_grid(~ treatment) + ylim(0.55,5) +
  theme(legend.position = "none")
rawPlot <- rawPlot + ylim(0.55,5)

show(rawPlot)

All sensor signals, broken out by Sensor, with common y scales

All sensor signals, broken out by Sensor, with free y axis scales

All sensor signals, broken out by Treatment to show the variation within treatments

One surprising element here is the very strong response from the Wheat Flood pots. Ellen has an idea about why this might be the case, with these wheat plants possibly creating a good environment for anaerobic bacteria which can decompose carbon compounds.

Examine sensor signals when averaged by treatment

####Average sensor signals by treatment####
aveData <- allData %>% 
  mutate(dateTime = cut(dateTime, breaks='24 hour')) %>% group_by(treatment, dateTime) %>%
  dplyr::summarize(stDev = sd(Rnorm), Rnorm = mean(Rnorm, na.rm=T))

aveData$dateTime <- as.POSIXct(aveData$dateTime)

####Plot averaged sensor signals by treatment####
treatmentPlot <- ggplot() +
  geom_point(data=aveData,
             aes(dateTime,Rnorm,col=treatment,fill=treatment,shape=treatment), 
             size=2,show_guide = FALSE) +
  #stat_smooth(data=subset(aveData, dateTime < changeDate),
  #  aes(dateTime,Rnorm,col=treatment), method="lm", se=FALSE, 
  #            linewidth = 0.5,formula = y ~ poly(x,2)) +
  #stat_smooth(data=subset(aveData, dateTime > changeDate),
  #            aes(dateTime,Rnorm,col=treatment), method="lm", se=FALSE, 
  #            linewidth = 0.5,formula = y ~ poly(x,2)) +
  geom_vline(xintercept=changeDate, color="red",linetype="dashed",linewidth=1) +
  
  xlab("") + ylab(expression("Normalized Resistance (R/R"[o]*")")) +
  scale_color_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_shape_manual(values = treatmentShapes, guide="none") +
  xlim(rStart, endDate) + theme_bw() + ylim(0.85,7.5) +
  theme(legend.position="bottom")

zoomTreatment <- treatmentPlot + ylim(0.85,2.5) + theme(legend.position="none")

All sensor signals, averaged by treatment, at two different y scales:

One strange behavior seen here is the sudden change in the trajectory the normalized resistance of sensors installed in the Species-Rich Control pots. This is clearly visible in the lower plot, which zooms in on smaller sensor signals.

We have sometimes seen a decreasing normalized resistance in very dry and non-microbially-active soils, due to annealing of the polymer. In general, we don’t expect to see a negative slope except in these extremely dry and non-active conditions (although it has been mentioned that all of the pots have been getting quite dry in the very hot environment in the polytunnel).

Examine variability in signals

Let’s take a look at the sensor signals by treatment type, including an error cloud to represent the standard deviation within each sensor signal:

#Add an error cloud and break out by treatment
devPlot1 <- treatmentPlot + 
  geom_ribbon(data=aveData, aes(x=dateTime,
                                ymax=Rnorm+stDev,ymin=Rnorm-stDev,
                                col=treatment,fill=treatment),alpha=0.25) +
  scale_fill_manual(name = "Pot Treatment", values = treatmentColors, guide="none") +
  facet_wrap(~treatment) + theme(legend.position="none") + 
  ylim(-.75,11.5)

show(devPlot1)

#Add a watering regime variable
aveData$watering <- "Control"
aveData[aveData$treatment=="Species rich Drought",]$watering <- "Drought"
aveData[aveData$treatment=="Wheat Drought",]$watering <- "Drought"
aveData[aveData$treatment=="Species rich Flood",]$watering <- "Flood"
aveData[aveData$treatment=="Wheat Flood",]$watering <- "Flood"

#Add an error cloud and break out by watering regime
devPlot2 <- ggplot() + theme_bw() +
  geom_ribbon(data=aveData, aes(x=dateTime,
                                ymax=Rnorm+stDev,ymin=Rnorm-stDev,
                                col=treatment,fill=treatment),
                                alpha=0.25,linewidth=1) +
  scale_fill_manual(name = "Pot Treatment", values = treatmentColors,
                    guide="none") +
  scale_color_manual(name = "Pot Treatment", values = treatmentColors,
                     guide="none") +
  facet_grid(rows=vars(watering)) + theme(legend.position="none") + 
  ylim(-.75,11.5) + xlab("") +
  geom_vline(xintercept=changeDate, color="red", linetype="dashed",
                                linewidth=1) +
  ylab(expression("Normalized Resistance (R/R"[o]*")"))

show(devPlot2)

ggsave(plot=devPlot1, filename="devPlot1.png",width=8,height=7,dpi=300)
ggsave(plot=devPlot2, filename="devPlot2.png",width=8,height=7,dpi=300)

Import and show other measurements taken during the experiment

####GET SOIL MOISTURE AND TEMPERATURE DATA####
setwd(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)))
smTemp <- read.csv('Soil temp and moisture.csv',header=T)
smTemp$dateTime <- as.POSIXct(smTemp$Date, format="%d.%m.%y")
smTemp$treatment <- paste(smTemp$Composition, smTemp$Watering)

#Get average soil moisture and temperatures
aveSmTemp <- smTemp %>% group_by(dateTime, treatment) %>%
  dplyr::summarize(temp = mean(Temp, na.rm=T), tempSd = sd(Temp),
                   tempErr = std.error(Temp),
                   sm = mean(SMC, na.rm=T), smSd = sd(SMC),
                   smErr = std.error(SMC))

#Aesthetic parameters for error bars
barWidth <- 100000; barLineWidth <- 100000
pd <- position_dodge(width = 75000)

smPlot <- ggplot() + 
  geom_line(data=aveSmTemp, aes(dateTime,sm,col=treatment),
            size=0.75,alpha=0.5) +
  geom_errorbar(data=aveSmTemp,aes(x=dateTime,ymin=sm-smErr,ymax=sm+smErr,
                                   col=treatment),width=barWidth,
                                    position=pd,linewidth=0.75) +
   geom_vline(xintercept=changeDate, color="red", linetype="dashed") +
   xlab("") + ylab(paste("Volumetric Water", "\n", "Content (%)")) +
   scale_color_manual(name = "Pot Treatment", values = treatmentColors) +
   scale_shape_manual(values = treatmentShapes, guide="none") +
   xlim(rStart, endDate) + theme_bw() + theme(legend.position="bottom", 
                                              legend.title=element_blank())

stPlot <- ggplot() + 
  geom_line(data=aveSmTemp,aes(dateTime,temp,col=treatment),
            size=0.75,alpha=0.5) +
  geom_errorbar(data=aveSmTemp,aes(x=dateTime,ymin=temp-tempErr,
                                   ymax=temp+tempErr,
                                   col=treatment),width=barWidth,
                                    position=pd,linewidth=0.75) +
  geom_vline(xintercept=changeDate, color="red", linetype="dashed") +
  xlab("") + ylab(paste("Soil", "\n", "Temperature (C)")) +
  scale_color_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_shape_manual(values = treatmentShapes, guide="none") +
  xlim(rStart, endDate) + theme_bw() + theme(legend.position="bottom", 
                                             legend.title=element_blank())

####GET UNADJUSTED CO2 FLUX####
setwd(paste(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)),
            "/Gas flux",sep=""))
Gas <- read.delim("Gas flux final dataset.txt")
Gas$treatment <- paste(Gas$Composition,Gas$Watering)
co2DataRaw <- data.frame(Pot.number=Gas$Pot.number, treatment=Gas$treatment,
                      dateTime = as.POSIXct(Gas$Date, format="%d.%m.%y"),
                      co2_flux=Gas$mg.CO2.m.2.hr.1,
                      co2c_flux=Gas$mg.CO2.C.m.2.hr.1)

aveCO2 <- co2DataRaw %>% group_by(dateTime, treatment) %>%
  dplyr::summarize(co2 = mean(co2_flux, na.rm=T), co2Sd = sd(co2_flux),
                   co2Err = std.error(co2_flux),
                   co2c = mean(co2c_flux, na.rm=T), co2cSd = sd(co2c_flux))

co2Plot <- ggplot() +
  geom_line(data=aveCO2, aes(dateTime,co2,col=treatment),size=0.75,alpha=0.5) +
  geom_errorbar(data=aveCO2,aes(x=dateTime,ymin=co2-co2Err,ymax=co2+co2Err,
                                   col=treatment),width=barWidth,
                                   position=pd,linewidth=0.75) +
  geom_vline(xintercept=changeDate, color="red", linetype="dashed") +
  xlab("") + ylab(expression("CO"[2]*" Flux" ~ "(m"^3 ~ "/ hr)")) +
  scale_color_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_shape_manual(values = treatmentShapes, guide="none") +
  xlim(rStart, endDate) + theme_bw() + theme(legend.position="bottom", 
                                             legend.title=element_blank())

####GET ADJUSTED CO2 FLUX####
co2adj <- read.delim("Respiraton predicted.txt")
co2adj$treatment <- paste(co2adj$Composition,co2adj$Watering)
names(co2adj) <- c("Pot.number","Composition","Watering","Replicate",
                   "Date","Flux","treatment")

co2DataAdj <- data.frame(Pot.number=co2adj$Pot.number, 
                         treatment=co2adj$treatment,
                      dateTime = as.POSIXct(co2adj$Date, format="%d.%m.%y"),
                      co2_flux=co2adj$Flux)

aveCO2adj <- co2DataAdj %>% group_by(dateTime, treatment) %>%
  dplyr::summarize(co2 = mean(co2_flux, na.rm=T), co2Sd = sd(co2_flux))

co2PlotAdj <- ggplot() +
  geom_line(data=aveCO2adj,
            aes(dateTime,co2,col=treatment),linewidth=1) +
  geom_errorbar(data=aveCO2,aes(x=dateTime, ymin=co2-co2Sd, ymax=co2+co2Sd,
                                col=treatment),width=100000,position=pd) +
  geom_vline(xintercept=changeDate, color="red", linetype="dashed") +
  xlab("") + ylab(expression("Adjusted CO"[2]*" Flux" ~ "\n" ~ "(m"^3 ~ "/hr)")) +
  scale_color_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_shape_manual(values = treatmentShapes, guide="none") +
  xlim(rStart, endDate) + theme_bw() + theme(legend.position="bottom", 
                                             legend.title=element_blank())

####Look at Adjusted vs. Non-Adjusted CO2 Data####
co2CorrPlot <- ggplot() + theme_bw() +
  geom_point(aes(x=aveCO2$co2,y=aveCO2adj$co2)) +
  geom_line(aes(x=c(0,4000),y=c(0,4000)),linetype="dashed") +
  xlim(0,4000) + ylim(0,4000) +
  xlab("Unadjusted CO2 Flux") + ylab("Adjusted CO2 Flux")

####Get averaged logger temperature, 30min increments####
loggerTemp <- allData %>%
  mutate(dayTime = cut(dateTime, breaks="30 min")) %>% group_by(dayTime) %>%
  dplyr::summarize(Temp_mean = mean(temp, na.rm=T), Temp_sdev = sd(temp))
loggerTemp$dayTime <- as.POSIXct(loggerTemp$dayTime)
loggerTemp <- subset(loggerTemp, Temp_mean > 0) #Delete one bad datapoint

lTempPlot <- ggplot() + theme_bw() +
  geom_point(data=loggerTemp, aes(dayTime,Temp_mean),col="orange2",size=0.5,
             shape=21) + 
  geom_vline(xintercept=changeDate, color="red", linetype="dashed") +
  xlab("") + ylab(paste("Air", "\n", "Temperature (C)")) +
  xlim(rStart,endDate) + theme(legend.position="none")
  

####Combine into one plot####
tsPlot <- ggarrange(smPlot + theme(plot.margin=unit(c(0,1,0,1), 'cm')), 
                    stPlot + theme(plot.margin=unit(c(0,1,0,1), 'cm')), 
                    co2Plot + theme(plot.margin=unit(c(0,1,0,1), 'cm')),
                    lTempPlot + theme(plot.margin=unit(c(0,1,0,1), 'cm')),
                    ncol=1, nrow=4, align="hv", common.legend=TRUE,
                    labels=c("(a)","(b)","(c)","(d)"))

show(tsPlot)

ggsave(plot=tsPlot, filename="tsPlots.png",width=12,height=8, dpi=300,bg="white")
####Get the minimum and maximum temperatures for each day####
loggerTemp$day <- as.Date(loggerTemp$dayTime)

dailyTemps <- loggerTemp %>% group_by(day) %>%
  summarise(maxTemp = max(Temp_mean), minTemp = min(Temp_mean))

mean(dailyTemps$maxTemp); mean(dailyTemps$minTemp)
## [1] 37.29656
## [1] 10.73889
####Get the soil temperatures during treatment####
treatmentSoilTemp <- subset(aveSmTemp, dateTime < changeDate)

treatmentSoilTemps <- treatmentSoilTemp %>% group_by(treatment) %>%
  summarise(meanTemp = mean(temp))

aveControlTemp <- mean(27.32, 26.92)
aveDroughtTemp <- mean(29.54, 29.46)
aveFloodTemp <- mean(24.30,24.45)


#Clean up the workspace
rm(co2Plot, aveCO2, co2Data, Gas, stPlot, smPlot, pd, aveSmTemp,lTempPlot)

Show data with decomposition windows and their slopes

Here, we define a set of Decomposition Windows within which slopes should be taken:

#Get number of days since start, to calculate slopes as day^-1
allData$tDay <- as.numeric(allData$dateTime - rStart)/60/60/24

allData$window <- "NA"

#Assign some windows to get slopes within
allData[allData$dateTime >= as.POSIXct("2024-05-01") &
        allData$dateTime <= as.POSIXct("2024-05-13"),]$window <- "Treatment Window"

allData[allData$dateTime >= changeDate+days(3)  &
        allData$dateTime <= changeDate+days(35),]$window <- "Recovery Window"

#Subset data to only include the decomposition windows for the boxplot
windowData <- subset(allData, window != "NA")

#Generate window boxes to overlay
rects <- data.frame(xstart = as.POSIXct("2024-01-01"), 
                    xend = as.POSIXct("2024-01-01"), window = "NA")

for(theWindow in c("Treatment Window","Recovery Window")){
  newLine <- data.frame(xstart = min(windowData[windowData$window==theWindow,]$dateTime),
                        xend = max(windowData[windowData$window==theWindow,]$dateTime),
                        window = theWindow)
  rects <- rbind(rects,newLine)}
  
rects <- rects[-1,]; rects$window <- as.factor(rects$window)

####Overlay Windows on Treatment Plot####
windowPlot <- treatmentPlot + theme(legend.position="none") +
  geom_rect(data=rects,aes(xmin=rects$xstart, xmax=rects$xend, ymin=-Inf,
                           ymax=Inf,fill = window),show.legend = FALSE,
                            alpha=0.1) +
  scale_fill_brewer(palette = "Set2") +
  geom_label(aes(x=rects$xstart,y=5.5),label=rects$window, angle=90) + 
  ylab(expression("Mean Normalized Resistance (R/R"[o]*")")) +
  ylim(0.85, 7) +  theme(legend.position = "none") +
  xlim(as.POSIXct("2024-04-27 00:08:00"),as.POSIXct("2024-06-18 20:00:00"))

#Get individual sensor slopes                     
slopesWindow <- windowData %>% group_by(sensor, treatment, window) %>%
  dplyr::summarize(slope = summary(lm(Rnorm ~ tDay + tDay^2))$coefficients[2])

slopesWindow$window <- as.factor(slopesWindow$window)
slopesWindow$treatment <- as.factor(slopesWindow$treatment)
slopesWindow$slope <- as.numeric(slopesWindow$slope)

####Check to see if slopes are normally distributed####
normTest <- slopesWindow %>% group_by(treatment,window) %>%
  summarise(shap = tidy(shapiro.test(slope))) %>% unnest(shap)

####Show slope distributions across treatments####
#Reorder factors so that Treatment is shown before Recovery
slopesWindow$window <- factor(slopesWindow$window, levels=c("Treatment Window","Recovery Window"))

boxPlot <- ggplot(data=slopesWindow) + theme_bw() +
  geom_boxplot(aes(x=window, y=slope, col=window, fill=treatment),
               outlier.shape=NA, show.legend = FALSE) +
  geom_point(aes(x=window, y=slope, col=window, fill=treatment),
    position = position_jitterdodge(), alpha = 0.3, size=2) +
  scale_fill_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_color_manual(name = "Experiment Phase", 
                     values = c("1" = "black", "2" = "black")) +
  xlab("") + ylab(bquote('Sensor Slope '(day^-1))) + 
  theme(legend.position = "none") + facet_grid(~treatment) +  ylim(-1,16) +
  scale_x_discrete(label = abbreviate) +
  stat_compare_means(method='anova', aes(x=window,y=slope,
                         label=paste0("p = ",..p.format..)))

####Combine the two plots####
slopesPlot <- ggarrange(windowPlot,boxPlot,nrow=2,ncol=1,align="hv",
                        labels=c("(a)","(c)"))

ggsave(plot=slopesPlot, filename="slopesPlot.png",width=8,height=8,dpi=300)

bigPlot <- ggarrange(slopesPlot,devPlot1,nrow=1,ncol=2,
                     labels=c("","(b)"))
bigPlot

ggsave(plot=bigPlot, filename="bigPlot.png",width=16,height=8,dpi=300)

Show the characteristics of the two experimental soils

Ellen collected Harvest Data at the end of the experiment. We’ll import that now, and map out the measured soil properties of the two soils:

#Move up one level in the file structure
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)); setwd('..')
hData <- read.delim('Final harvest dataset.txt')
#Combine with the original lookup table
hData <- hData %>% rename(Pot = Pot.number)
hData <- select(hData, -c(Composition,Watering,Replicate))
resultsTable <- merge(lookupTable, hData, by="Pot")
resultsTable <- select(resultsTable, -c(Replicate,
                                      Device, Port, Installs, Sensor))
resultsTable <- resultsTable %>% rename(sensor = deviceID, treatment=Treatment)

####Add the slopes to the results table####
resultsTable <- merge(resultsTable,subset(slopesWindow,window=="Treatment Window"),
                      by = c("sensor"))
resultsTable <- select(resultsTable, -c("treatment.y"))

####Rename all of the columns in the Results Table####
names(resultsTable) <- c("sensor","Pot","Composition","Watering","Treatment","R_pre",
                         "R_ins","MUX", "SMC (%)", "pH", 
                         "AGB (g)","BGB (g)","CBH (nmol/g/hr)",
                         "GLC (nmol/g/hr)","NAG (nmol/g/hr)","PHO (nmol/g/hr)",
                         "XYL (nmol/g/hr)","PER (nmol/g/hr)","POX (nmol/g/hr)",
                         "Soil Carbon (%)","Soil Nitrogen (%)","C-N Ratio",
                         "MBC (mg/kg)","MBN (mg/kg)","X",
                         "CBH_norm","GLC_norm","NAG_norm","PHO_norm","XYL_norm",
                         "PER_norm","POX_norm","window","slope")

#Remove columns we don't care about
boxData <- select(resultsTable, -c("sensor","Pot","Treatment","R_pre",
                                   "R_ins","MUX","X","window","slope",
                                   "CBH_norm","GLC_norm","NAG_norm","PHO_norm",
                                    "XYL_norm","PER_norm","POX_norm"))

#Rename the soils from 'Composition'
boxData[boxData$Composition=="Species rich",]$Composition <- "Species Rich Soil"
boxData[boxData$Composition=="Wheat",]$Composition <- "Winter Wheat Soil"

#Fix the Soil Carbon units issue
boxData$`Soil Carbon (%)` <- boxData$`Soil Carbon (%)`*100

#Melt the data
pointData <- melt(boxData, id=c("Composition","Watering"))
boxData <- melt(select(boxData,-c("Watering")),id=c("Composition"))

####Perform significance tests####
stat.test <- boxData %>% group_by(variable) %>% t_test(value ~ Composition) %>%
  add_significance()

#Restrict significance stars to 3 stars at max significance
stat.test$p.signif[stat.test$p.signif=="ns"] <- ""
stat.test$p.signif[stat.test$p.signif=="****"] <- "***"

# stat.test$sName <- paste(stat.test$variable, '\n', 
#                          'p = ',stat.test$p, stat.test$p.signif, sep="")

stat.test$sName <- paste(stat.test$variable, stat.test$p.signif, sep=" ")

####Replace names in boxData and pointData####
stat.test <- select(stat.test, c("variable", "sName"))
boxData <- merge(boxData, stat.test, by = "variable")
boxData$variable <- boxData$sName
boxData <- select(boxData, -c("sName"))

pointData <- merge(pointData, stat.test, by = "variable")
pointData$variable <- pointData$sName
pointData <- select(pointData, -c("sName"))

####Rearrange facet grid order####
origLevels <- unique(boxData$variable)
newLevels <- origLevels

newLevels <- c(origLevels[10], origLevels[13], origLevels[1], origLevels[2],
               origLevels[14], origLevels[15], origLevels[6], origLevels[7],
               origLevels[4], origLevels[16], origLevels[12], origLevels[5],
               origLevels[3], origLevels[8], origLevels[9], origLevels[11])

####Custom palette####
boxPalette <- c("Species Rich Soil" = "gray45",
                   "Winter Wheat Soil" = "lightgrey",
                   "Drought" = "red",
                  "Control" = "darkgreen",
                   "Flood" = "blue")

#Change order of Watering levels for display
pointData$Watering <- factor(pointData$Watering,
                             levels = c("Drought","Control","Flood"))

#Add labels to each facet
label_data <- data.frame(
  variable = factor(newLevels, levels = newLevels),  # Ensure factor levels match
  label = paste0("(", letters[1:length(newLevels)], ")"),
  x = .5,  # Assuming 1st x-category for all panels
  y = Inf  # Will use Inf to place it at the top, then adjust with vjust
)

####Box plots with Composition and Watering####
soilPlot <- ggplot() + 
  geom_boxplot(data=boxData, aes(x=Composition,y=value,fill=Composition),
               outlier.shape=NA, alpha = 0.5) +
  geom_point(data=pointData, aes(x=Composition,y=value,col=Watering),
             position = position_dodge(width = 0.5),
             alpha = 0.5, size=1) +
  scale_color_manual(name = "Initial Treatment:", values = boxPalette)+
  scale_fill_manual(name = "", values = boxPalette) +
  xlab("") + ylab("") + guides(fill = FALSE) + 
  theme_bw() + theme(legend.position="bottom", 
                     legend.text=element_text(size=11),
                     legend.title=element_text(face="bold")) + 
  guides(colour = guide_legend(override.aes = list(size=2))) +
  scale_x_discrete(labels = label_wrap(12)) +
  facet_wrap(~factor(variable,levels=newLevels), scales="free_y") +
  geom_text(data = label_data, aes(x = x, y = y, label = label),
    inherit.aes = FALSE, hjust = 0, vjust = 2,
    fontface = "bold",size=9/.pt)

soilPlot

ggsave(plot=soilPlot, filename="soilPlot2.png",width=8,height=7,dpi=300)

We can have a closer look by nesting box plots together to examine the recovery of each soil composition following each initial Watering treatment:

Look at relationships between CO2 flux and sensor slopes

First, we’ll take the dataset of CO2 flux rather than looking at the raw CO2 evolution data, and we’ll see if there is a relationship between individual daily sensor slope and the CO2 flux measured from that sensor’s pot:

#Clean up the workspace
rm(allMux, boxPlot, co2CorrPlot, co2PlotAdj, combPlot,cps, dailyMUX,
   loggerTemp,moreData,muxData,muxLookup,muxRo,newLine,offData,parMux,
   rects, slopesPlot, treatmentPlot, tsPlot, windowData,
   windowPlot, zoomTreatment,i,index,mux,offset1,offset2,pal_mux)

#Get the dates that CO2 flux readings were taken
co2Dates <- unique(co2DataAdj$dateTime)

#Get a dataframe for co2 readings, sensor ID and date
names(co2DataAdj) <- c("Pot", "Treatment", "dateTime", "co2_flux")
potLookup <- data.frame(Pot=lookupTable$Pot,sensor=lookupTable$deviceID)
co2 <- merge(co2DataAdj, potLookup, by="Pot")
co2 <- select(co2, -c("Pot","Treatment"))
names(co2) <- c("date","co2_flux","sensor")

#Get slopes for each sensor on each day of the experiment
allData$date <- as.Date(allData$dateTime)

daySlopes <- allData %>% group_by(sensor, treatment, date) %>%
  dplyr::summarize(slope = summary(lm(Rnorm ~ tDay + tDay^2))$coefficients[2])

#Combine CO2 and decomp sensor data by date
co2slopes <- merge(daySlopes, co2, by=c("date","sensor"))

#Plot it
co2corrPlot <- ggplot(data=co2slopes) + 
  theme_bw() +
  geom_point(aes(x=co2_flux,y=slope,col=treatment)) + 
  geom_smooth(aes(x=co2_flux,y=slope), method=lm, linewidth=0.5, col="black",
              se=FALSE) +
  ylab("Sensor Slope Across Whole Day") + xlab("CO2 Flux Measurement") +
  scale_color_manual(name = "Pot Treatment", values = treatmentColors) +
  #  stat_regline_equation(label.y.npc=1) +
    stat_cor(aes(x=co2_flux,y=slope, label = 
        paste(..rr.label.., ..p.label.., sep = "~`,`~")),label.y.npc=0.95) +
  facet_wrap(~ date, scales = "free_y") +
  theme(legend.position = "bottom")

show(co2corrPlot)

ggsave(plot=co2corrPlot, filename="co2_slope_corr.png",width=8,height=5,dpi=300)

There was no meaningful relationship here, which we don’t find surprising because part of the point of these decomposition sensors is that we believe they capture types of activity that are not captured by CO2. For example, the winter wheat gave a large sensor response when flooded despite the fact that in waterlogged conditions, we saw basically very low CO2 flux.

Let’s at least examine CO2 flux visually and see if it relates to the sensor decomposition slopes:

#Clean up
rm(aveCO2adj,co2,co2adj,co2corrPlot,co2DataAdj,co2DataRaw,co2slopes,
   devPlot2,hData,pointData,smTemp,soilPlot,stat.test,
   waterPlot, waterPlot2)

#Import the data combined from all of the CO2 monitoring activities
setwd(paste(dirname(dirname(rstudioapi::getActiveDocumentContext()$path)),
            "/Gas flux",sep=""))

co2 <- read.csv('raw_gas_readings.csv')
names(co2) <- c('date','time',c(1:36))

co2 <- melt(co2, id=c('date','time'), value.name="time")
names(co2) <- c('date','time','Pot','co2')

#Get the sensor
co2 <- merge(co2, potLookup, by="Pot")
co2$date <- as.POSIXct(co2$date, format="%m/%d/%Y")
co2$dateTime <- co2$date + seconds(co2$time)

#Get the treatment
co2 <- merge(co2, treatmentLookup, by="sensor")

#Let's look at all the data so far, based on pot treatment
ggplot(co2) + theme_bw() + xlab("") +
  geom_point(aes(x=time,y=co2,col=treatment)) +
  facet_wrap(~date) + ylab("CO2 Concentration (ppm)") +
  scale_color_manual(name = "Pot Treatment", values = treatmentColors) +
  theme(legend.position = "bottom")

#Get CO2 slopes based on date and corresponding pot/sensor/treatment
co2_slopes <- co2 %>% group_by(sensor, treatment, date) %>%
  dplyr::summarize(slope = summary(lm(co2 ~ time))$coefficients[2],
                   integral=round(pracma::trapz(time, co2),0))

ggplot(co2_slopes) + theme_bw() + xlab("") +
  geom_boxplot(aes(x=treatment,y=slope,col=treatment)) +
  facet_wrap(~date) + ylab('CO2 Integral (ppm s)') +
  scale_color_manual(name = "Pot Treatment", values = treatmentColors) +
  theme(legend.position = "bottom") +
  ggtitle("CO2 Accumulation Slopes")

#Let's aggregate data by pre- and post-intervention
co2_slopes$date <- as.POSIXct(co2_slopes$date)
co2_slopes$window <- 1
co2_slopes[co2_slopes$date > changeDate,]$window <- 2
co2_slopes$window <- as.factor(co2_slopes$window)
co2_slopes$date <- as.factor(co2_slopes$date)

#Let's look at the co2 accumulation slope distributions for the pots
#during their initial phase, before the treatments were modified:
ggplot(data=subset(co2_slopes,window=="1")) + theme_bw() +
  geom_boxplot(aes(x=window, y=slope, col=window, fill=treatment),
               outlier.shape=NA, show.legend = FALSE) +
  geom_point(aes(x=window, y=slope, col=window, fill=treatment),
    position = position_jitterdodge(), alpha = 0.3, size=2) +
  scale_fill_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_color_manual(name = "Experiment Phase", 
                     values = c("1" = "black", "2" = "black")) +
  xlab("Experiment Phase") + ylab(bquote('CO2 Flux (ppm '(s^-1))) + 
  theme(legend.position = "none") + facet_grid(~treatment) +  ylim(0,3.5)

#Now let's look at the co2 accumulation distribution by decomposition window
windowSlopes <- ggplot(data=co2_slopes) + theme_bw() +
  geom_boxplot(aes(x=window, y=slope, col=window, fill=treatment),
               outlier.shape=NA, show.legend = FALSE) +
  geom_point(aes(x=window, y=slope, col=window, fill=treatment),
    position = position_jitterdodge(), alpha = 0.3, size=2) +
  scale_fill_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_color_manual(name = "Experiment Phase", 
                     values = c("1" = "black", "2" = "black")) +
  xlab("Experiment Phase") + ylab(bquote('CO2 Flux (ppm '(s^-1))) + 
  theme(legend.position = "none",axis.text.x=element_blank()) + 
  facet_grid(~treatment) +  ylim(0,3.5)

windowInts <- ggplot(data=co2_slopes) + theme_bw() +
  geom_boxplot(aes(x=window, y=integral, col=window, fill=treatment),
               outlier.shape=NA, show.legend = FALSE) +
  geom_point(aes(x=window, y=integral, col=window, fill=treatment),
    position = position_jitterdodge(), alpha = 0.3, size=2) +
  scale_fill_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_color_manual(name = "Experiment Phase", 
                     values = c("1" = "black", "2" = "black")) +
  xlab("Experiment Phase") + ylab("CO2 Slope (ppm / sec)") + 
  theme(legend.position = "none") + facet_grid(~treatment) +  ylim(20000,40000)

ggarrange(windowSlopes, windowInts, nrow=2, align="hv")

#We can also look at this by date to see the evolution more precisely
dailySlopes <- ggplot(data=co2_slopes) + theme_bw() +
  geom_boxplot(aes(x=date, y=slope, fill=treatment),
               outlier.shape=NA, show.legend = FALSE) +
  geom_point(aes(x=date, y=slope, col=window,fill=treatment),
    position = position_jitterdodge(), alpha = 0.3, size=2) +
  scale_fill_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_color_manual(name = "Experiment Phase", 
                     values = c("1" = "black", "2" = "darkred")) +
  xlab("") + ylab(bquote('CO2 Flux (ppm '(s^-1))) + 
  theme(legend.position = "none",
        axis.text.x=element_blank()) + 
  facet_grid(~treatment) +  ylim(0,3.5) +
  ggtitle("Daily Slopes")

dailyInt <- ggplot(data=co2_slopes) + theme_bw() +
  geom_boxplot(aes(x=date, y=integral, fill=treatment),
               outlier.shape=NA, show.legend = FALSE) +
  geom_point(aes(x=date, y=integral, col=window,fill=treatment),
    position = position_jitterdodge(), alpha = 0.3, size=2) +
  scale_fill_manual(name = "Pot Treatment", values = treatmentColors) +
  scale_color_manual(name = "Experiment Phase", 
                     values = c("1" = "black", "2" = "darkred")) +
  xlab("") + ylab('CO2 Integral (ppm s)') + 
  theme(legend.position = "none",
        axis.text.x = element_text(angle=45,vjust=1,hjust=1)) + 
  facet_grid(~treatment) +  ylim(22500,40000) +
  ggtitle("Daily Integrals")

ggarrange(dailySlopes, dailyInt, nrow=2, align="hv")

So, it appears that there was no statistically-significant difference between the average CO2 flux values before and after the drought and flood treatments were applied. The pattern of CO2 flux across time also doesn’t appear to show obvious trends between pre-and post-treatment.

Effect Size calculations

A better way to represent the CO2 data might be to look at CO2 efflux effect sizes as a function of treatment and date, holding the control groups as effect size zero:

cohens <- co2_slopes %>% group_by(date) %>%
  cohens_d(slope ~ treatment)

ts <- co2_slopes %>% group_by(date) %>%
  t_test(slope ~ treatment)


es <- subset(cohens, group1 %in% c("Species rich Control","Wheat Control"))
es <- subset(es, group2 %in% c("Species rich Drought", "Species rich Flood",
                               "Wheat Drought", "Wheat Flood"))
es$treatment <- paste(es$group1,":",es$group2)

es <- subset(es, treatment %in% c("Species rich Control : Species rich Drought",
                                  "Species rich Control : Species rich Flood",
                                  "Wheat Control : Wheat Drought",
                                  "Wheat Control : Wheat Flood"))

es$date <- as.POSIXct(es$date)

#Let's plot the effect size over time for each soil type and treatment
effTime <- ggplot(es,aes(x=date,y=effsize,col=treatment)) + theme_bw() +
  geom_point() + geom_line() + xlab("") + ylab("Effect Size") +
  theme(legend.position = "bottom")

effTime