# Load libraries
library(curl)
## Using libcurl 7.76.1 with OpenSSL/1.1.1l-fips
library(RJSONIO)
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:curl':
##
## parse_date
library(openair)
library(ggplot2)
library(parallel)
library(doParallel)
## Loading required package: foreach
## Loading required package: iterators
library(pspline)
The point of this is to explore the 2021 HauHau Ventilation experiment
# Get list of devices from Google Drive
aa <- curl_fetch_memory("https://docs.google.com/spreadsheets/d/e/2PACX-1vSNExRg6Q44L6m0Zet55Bh8lFVWxmCLtMF_csLP-zr4hY0l-fLth9cqN8EasIwA8L5De_1S8kInFwuW/pub?output=csv")
hauhau_devices <- read.csv(textConnection(rawToChar(aa$content)))
names(hauhau_devices) <- c("name", "devID", "type", "location", "user", "guestflag")
devices <- hauhau_devices$devID
# Let's focus on one device
device <- subset(hauhau_devices, name == "HAUHAU-240")
# Fetch data
# HauHau API stuff
hhbase_url <- "http://stats.iaqsensors.com/hauhau/api/requestDeviceData?"
hhapiKey <- "0652896a-cf03-4a4f-8487-ae3ef8634bb8"
# ==================================
# Make sure that x_now is in LOCAL TIME (NZDT for the current experiment)
x_now <- Sys.time()
# The beginning of this summary ... 14 hours in the past
x_start <- x_now - 8 * 3600
#x_start <- as.POSIXct("2021-11-30 08:00:00")
#x_now <- as.POSIXct("2021-11-30 16:00:00")
# Turn it to the format that the API wants
startDate <- strftime(x_start, format = "%Y-%m-%d%%20%H:%M:%S")
endDate <- strftime(x_now, format = "%Y-%m-%d%%20%H:%M:%S")
hhbuilt_url <- paste0(hhbase_url,
"apiKey=",hhapiKey,"&",
"devID=",device$devID,"&",
"startDate=",startDate,"&",
"endDate=",endDate)
req1 <- curl_fetch_memory(hhbuilt_url)
reqOK <- req1$status_code==200
if (!reqOK){
print("Request error")
}
#Check we've got some data
server_response <- fromJSON(rawToChar(req1$content))
data_available <- server_response$status==1
# There is data in sensorlog so parse it
jreq1 <- server_response$sensorlog
npoints <- length(jreq1)
dataHH.raw <- data.frame(id = (1:npoints))
dataHH.raw$date <- as.POSIXct(jreq1[[1]][[1]])
dataHH.raw$Temperature <- NA
dataHH.raw$Humidity <- NA
dataHH.raw$CO2 <- NA
dataHH.raw$PM2.5 <- NA
dataHH.raw$sensorName <- device$name
for (i in (1:npoints)){
dataHH.raw$date[i] <- as.POSIXct(jreq1[[i]][[1]])
dataHH.raw$Temperature[i] <- as.numeric(jreq1[[i]][[2]])
dataHH.raw$Humidity[i] <- as.numeric(jreq1[[i]][[3]])
dataHH.raw$CO2[i] <- as.numeric(jreq1[[i]][[4]])
dataHH.raw$PM2.5[i] <- as.numeric(jreq1[[i]][[5]])
}
dataHH.raw$datenum <- as.numeric(dataHH.raw$date)
dataHH <- timeAverage(dataHH.raw, avg.time = '15 sec', fill = TRUE)
## Detected data with Daylight Saving Time.
dataHH$sensorName <- dataHH.raw$sensorName[1]
names(dataHH)
## [1] "date" "id" "Temperature" "Humidity" "CO2"
## [6] "PM2.5" "datenum" "sensorName"
#What does it look like
ggplot(dataHH, aes(x=date, y = CO2)) +
geom_line(colour = "red")
Let’s see what a log transformation does
dataHH$logCO2 <- log(dataHH$CO2)
ggplot(dataHH, aes(x=date, y = logCO2)) +
geom_line(colour = "red")
Let’s try to find some exponential decays in the first couple of days of data
# Walk the timeseries with a X minutes window and try to fit an exponential decay.
nobs <- length(dataHH$date)
obs_span <- 20 # Number of observations that make X minutes
#setup parallel backend to use many processors
cores <- detectCores()
cl <- makeCluster(cores - 2) #not to overload your computer
registerDoParallel(cl)
r2_array <- foreach(i=1:(nobs-obs_span),
.errorhandling = 'remove',
.combine = rbind) %dopar%
{
out_tmp <- data.frame(id = i)
out_tmp$adj.r2 <- NA
#for (i in (1:(nobs-30))){
window_data <- dataHH[(i:(i+obs_span)),]
window_data$hours <- (window_data$datenum - min(window_data$datenum,na.rm = TRUE)) / 3600
# Fit linear model logCO2 ~ hours
curr_model <- lm(data = window_data,logCO2 ~ hours)
out_tmp$adj.r2 <- summary(curr_model)$adj.r.squared
out_tmp$exp_coeff <- curr_model$coefficients[2]
out_tmp$intercept <- exp(curr_model$coefficients[1])
out_tmp
}
stopCluster(cl)
# Flag "high R2"
r2_array$highR2 <- r2_array$adj.r2
r2_array$highR2[r2_array$adj.r2<=0.85] <- NA
ggplot(data = r2_array, aes(x = dataHH$date[id])) +
geom_point(aes(y = adj.r2), colour = 'black') +
geom_point(aes(y=highR2), colour = 'red') +
geom_line(aes(y = dataHH$CO2[id]/max(dataHH$CO2[id],na.rm = TRUE)), colour = 'blue') +
ylim(0,1)
Let’s try to fit the full model \(ln(\frac{C_{ss}-C_{in}}{C_{ss}-C(t)})= Ft\)
With \(C_{ss}=C_{out} + \frac{S}{FV}\)
# Define functions
# Constants for the fitting
CO2ppm2Kgm3 <- 0.0018
min_CO2 <- min(dataHH$CO2)
Volume <- 225
# Mass balance model
f_model <- function(f.BG,f.S,f.ACH,f.Vol,f.C,f.t){
# Parameters:
# f.BG = CO2 background concentration [Kg/m3]
# f.S = CO2 emission rate TOTAL [Kg/hr]
# f.ACH = Air Changes per Hour [1/hr]
# f.Vol = Classroom Volume [m3]
# f.C = Initial CO2 concentration [Kg/m3]
# f.t = Time since the start of the increase/decrease [hr]
ndata <- length(f.t)
start_time <- f.t[1]
C_start <- f.C[1] - f.BG
term1 <-(f.S/(f.ACH*f.Vol)) * (1 - exp(-1 * f.ACH*(f.t - start_time)))
term2 <- (C_start - f.BG) * exp(-1 * f.ACH*(f.t - start_time))
f.C <- f.BG + term1 + term2
f.C
}
# Model error function
f_optim <- function(params,C,t){
# params = c(CO2 Emission Rate, ACH)
# C = Array of CO2 concentrations [Kg/m3]
# t = Array of time [hr]
# Note BACK differences so first datapoint is not used
f_optim <- 0
npoints <- length(C)
model_full <- f_model(min(C,na.rm = TRUE),
params[1],
params[2],
Volume,
C[1],
t)
sum((model_full - C[2:npoints])^2)
}
# Running the fitting over the whole timeseries
n_R <- length(r2_array$id)
cl <- makeCluster(cores - 2) #not to overload your computer
registerDoParallel(cl)
model_coeffs <- foreach(i=1:(n_R),
.errorhandling = 'remove',
.combine = rbind) %dopar%
{
out_tmp <- data.frame(id = r2_array$id[i])
out_tmp$ACH <- NA
out_tmp$ER <- NA
out_tmp$error <- NA
try({
# Select this segment of data (30 minutes)
c_data <- dataHH[(r2_array$id[i]:(r2_array$id[i]+obs_span)),]
c_data$hours <- (c_data$datenum - min(c_data$datenum,na.rm = TRUE)) / 3600
# Optimisation
fit_result <- optim(c(1e-5,1),
f_optim,
C=c_data$CO2*CO2ppm2Kgm3,
t=c_data$hours)
out_tmp$ACH <- fit_result$par[2]
out_tmp$ER <- fit_result$par[1]
out_tmp$error <- fit_result$value
}, silent = TRUE)
out_tmp
}
stopCluster(cl)
Now plotting
ggplot()+
geom_point(aes(x = dataHH$date[model_coeffs$id],y = (model_coeffs$ER)))
## Warning: Removed 40 rows containing missing values (geom_point).
ggplot()+
geom_point(aes(x = dataHH$date[model_coeffs$id],y = model_coeffs$ACH/max(model_coeffs$ACH)), colour = "black") +
geom_point(aes(x = dataHH$date[model_coeffs$id],y = r2_array$adj.r2/max(r2_array$adj.r2)), colour = "red") +
geom_line(aes(x = dataHH$date,y = dataHH$CO2/max(dataHH$CO2)), colour = "blue")
## Warning: Removed 1668 rows containing missing values (geom_point).
## Warning: Removed 1668 rows containing missing values (geom_point).
## Warning: Removed 1793 row(s) containing missing values (geom_path).
ggplot()+
geom_point(aes(x = dataHH$date[model_coeffs$id],y = (model_coeffs$ACH)))
## Warning: Removed 40 rows containing missing values (geom_point).
Now let’s try “nls”
# The model function is the same "f_model"
# Running the fitting over the whole timeseries
n_R <- length(r2_array$id)
cl <- makeCluster(cores - 2) #not to overload your computer
registerDoParallel(cl)
model_coeffs <- foreach(i=1:(n_R),
.errorhandling = 'remove',
.combine = rbind) %dopar%
{
out_tmp <- data.frame(id = r2_array$id[i])
out_tmp$ACH <- NA
out_tmp$ER <- NA
out_tmp$error <- NA
try({
# Select this segment of data (30 minutes)
c_data <- dataHH[(r2_array$id[i]:(r2_array$id[i]+obs_span)),]
c_data$hours <- (c_data$datenum - min(c_data$datenum,na.rm = TRUE)) / 3600
c_data$CO2 <- c_data$CO2 * CO2ppm2Kgm3
# Optimisation
model.nls <- nls(data = c_data,
formula = (CO2-CO2[1])*CO2ppm2Kgm3 ~ f_model(min(CO2),
EmRate,
ACH,
Volume,
CO2[1],
hours),
start = list(EmRate = 0,
ACH = 0.1),
trace = TRUE,
algorithm = "port")
summary_model <- summary(model.nls)
out_tmp$ACH <- summary_model$coefficients[2,1]
out_tmp$ER <- summary_model$coefficients[1,1]
RSS.p <- sum(residuals(model.nls)^2) # Residual sum of squares
TSS <- sum((c_data$CO2 - mean(c_data$CO2))^2) # Total sum of squares
out_tmp$FitMeas <- 1 - RSS.p/TSS
}, silent = false)
out_tmp
}
stopCluster(cl)
Now plotting
ggplot()+
geom_point(aes(x = dataHH$date[model_coeffs$id],y = (model_coeffs$ER)))
ggplot()+
geom_point(aes(x = dataHH$date[model_coeffs$id],y = (model_coeffs$ACH)))
ggplot()+
geom_point(aes(x = dataHH$date[model_coeffs$id],y = (model_coeffs$FitMeas)))
## Warning: Removed 9 rows containing missing values (geom_point).
ggplot(data = subset(model_coeffs, FitMeas>0.85))+
geom_point(aes(x = dataHH$date[id],y = (ER)))
ggplot(data = subset(model_coeffs, FitMeas>0.85))+
geom_point(aes(x = dataHH$date[id],y = (ACH)))