Eddy Covariance data available for download on the NWFP is presented in the European Fluxes Database format (http://www.europe-fluxdata.eu/home/guidelines/how-to-submit-data/variables-codes). This data should not be considered a ‘final’ dataset, and needs further QCing and cleaning by the user, according to their needs. The downloadable data contains information useful for QCing the data, including quality flags, friction velocity (u-star) and footprint location information. We have not removed or replaced any data from these files, so occurrence of low-quality data should be expected and filtered by the user. This document describes suggested steps and methodologies that can be taken to examine and QC the available data.
Depending on the options selected when downloading Eddy Covariance (EC) data from the NWFP portal, there may be separate columns for CO2, CH4, wind and meteorological data, and separate columns for each of the three available fields. The file I’ve downloaded here is available on the North Wyke Data Portal [https://nwfp.rothamsted.ac.uk/Download]. This contains all variables for Tower 4 (Burrows) under the ‘Greenhouse Gas Data’ tab. A maximum of one year of data can be downloaded at a time, this file contains all of 2020.
rm(list=ls())
#read the data into r
dat <- read.csv("W:/Documents/EC Advanced QC/greenhouse_2020-01-01_2021-01-01.csv",
stringsAsFactors = T, header = F, skip = 2)
header <- read.csv("W:/Documents/EC Advanced QC/greenhouse_2020-01-01_2021-01-01.csv",
stringsAsFactors = F, header = F, nrow = 1)
colnames(dat) <- header
colnames(dat) <- gsub("\\[|\\]", "", colnames(dat))
#convert datetime from character to POSIX
dat$Datetime <- as.POSIXct(as.character(format(dat$Datetime)),format = "%Y/%m/%d %H:%M:%S", tz = "UTC")
#see what columns there are
colnames(dat)
## [1] "Datetime" "CO2_1_1_1 Tower 4"
## [3] "H2O_1_1_1 Tower 4" "CH4_1_1_1 Tower 4"
## [5] "TAU_1_1_1 Tower 4" "TAU_SSITC_TEST_1_1_1 Tower 4"
## [7] "H_1_1_1 Tower 4" "H_SSITC_TEST_1_1_1 Tower 4"
## [9] "LE_1_1_1 Tower 4" "LE_SSITC_TEST_1_1_1 Tower 4"
## [11] "FC_1_1_1 Tower 4" "FC_SSITC_TEST_1_1_1 Tower 4"
## [13] "FCH4_1_1_1 Tower 4" "FCH4_SSITC_TEST_1_1_1 Tower 4"
## [15] "WD_0_0_1 Tower 4" "WS_0_0_1 Tower 4"
## [17] "WS_MAX_0_0_1 Tower 4" "U_SIGMA_0_0_1 Tower 4"
## [19] "V_SIGMA_0_0_1 Tower 4" "W_SIGMA_0_0_1 Tower 4"
## [21] "USTAR_0_0_1 Tower 4" "MO_LENGTH_0_0_1 Tower 4"
## [23] "ZL_0_0_1 Tower 4" "FETCH_MAX_0_0_1 Tower 4"
## [25] "FETCH_70_0_0_1 Tower 4" "FETCH_80_0_0_1 Tower 4"
## [27] "FETCH_90_0_0_1 Tower 4" "PA_0_0_1 Tower 4"
## [29] "RH_0_0_1 Tower 4" "TA_0_0_1 Tower 4"
## [31] "VPD_0_0_1 Tower 4" "T_SONIC_0_0_1 Tower 4"
## [33] "T_SONIC_SIGMA_0_0_1 Tower 4" "LWIN_1_1_1 Tower 4"
## [35] "LWOUT_1_1_1 Tower 4" "PPFD_1_1_1 Tower 4"
## [37] "RH_1_1_1 Tower 4" "RN_1_1_1 Tower 4"
## [39] "SHF_1_1_1 Tower 4" "SHF_2_1_1 Tower 4"
## [41] "SHF_3_1_1 Tower 4" "SWC_1_1_1 Tower 4"
## [43] "SWC_2_1_1 Tower 4" "SWC_3_1_1 Tower 4"
## [45] "SWIN_1_1_1 Tower 4" "SWOUT_1_1_1 Tower 4"
## [47] "TA_1_1_1 Tower 4" "TS_1_1_1 Tower 4"
## [49] "TS_2_1_1 Tower 4" "TS_3_1_1 Tower 4"
Select the field, if your dataframe has more than one included. Tidy up column names.
#some packages
library(plyr)
library(dplyr)
library(tidyverse)
#select the Datetime column and all Tower 4 () columns in the data frame
dat <- dat %>% dplyr::select(contains("Datetime"),
contains("Tower 4"))
#clean up column names
colnames(dat) <- gsub(" Tower 4", "", colnames(dat))
An initial check to see how much data is missing can be helpful. Here, we can see many of the soil measurements are not present.
#package for examining missing values
library(naniar)
#plot percent of missing values for each column
gg_miss_var(dat, show_pct = T)
We can count the number of NAs of the key flux variables. This is useful to keep track of as we go through the QC processing.
#count current nas
co2_na <- round(sum(is.na(dat$FC_1_1_1) )/length(dat$FC_1_1_1) * 100,1)
ch4_na <- round(sum(is.na(dat$FCH4_1_1_1))/length(dat$FCH4_1_1_1) * 100,1)
h_na <- round(sum(is.na(dat$H_1_1_1) )/length(dat$H_1_1_1) * 100,1)
le_na <- round(sum(is.na(dat$LE_1_1_1) )/length(dat$LE_1_1_1) * 100,1)
tau_na <- round(sum(is.na(dat$TAU_1_1_1) )/length(dat$TAU_1_1_1) * 100,1)
#put them all together in a data frame
nas <- data.frame(co2_na, ch4_na, h_na, le_na, tau_na)
rm(co2_na, ch4_na, h_na, le_na, tau_na)
#package for reshaping data
library(reshape2)
#reshape data
nas <- melt(nas)
#give na column a sensible name. We will add more columns as we lose more data in the next steps
colnames(nas)[2] <- "Inital_missing"
#look at the nas
nas
## variable Inital_missing
## 1 co2_na 21.5
## 2 ch4_na 58.8
## 3 h_na 10.3
## 4 le_na 21.7
## 5 tau_na 10.3
The output files include quality flags for all fluxes. The flag can be 0 (“high quality”); 1 (“medium”) or 2 (“low”). See Foken (2004) for more information. Normally we remove all data flagged with “2”, but keep that with a “0” or “1”. This can be different according to the specific needs of your analysis.
library(tidyselect)
library(tidyverse)
#convert foken flag values to factor
dat <- dat %>%
mutate_at(c(vars_select(colnames(dat), contains("SSITC"))), as.factor)
#plot fluxes some of the flux variables, color = foken number
p <- ggplot(dat, aes(x = Datetime))+
theme_bw()+
scale_color_manual(values = c("green","darkgreen","orange"))+
labs(x = "")
co2 <- p +
geom_point(aes(y = FC_1_1_1, col = FC_SSITC_TEST_1_1_1))
ch4 <- p +
geom_point(aes(y = FCH4_1_1_1, col = FCH4_SSITC_TEST_1_1_1))
#library for plotting multiple figures
library(cowplot)
#plot the figures
plot_grid(co2, ch4, ncol = 1)
#remove fluxes with foken = 2
dat$FC_1_1_1[dat$FC_SSITC_TEST_1_1_1 == 2] <- NA
dat$FCH4_1_1_1[dat$FCH4_SSITC_TEST_1_1_1 == 2] <- NA
dat$TAU_1_1_1[dat$TAU_SSITC_TEST_1_1_1 ==2] <- NA
dat$H_1_1_1[dat$H_SSITC_TEST_1_1_1 == 2] <- NA
#count nas after removing fokens
co2_foken <- round((sum(is.na(dat$FC_1_1_1)) )/ length(dat$FC_1_1_1) * 100,1) - nas$Inital_missing[1]
ch4_foken <- round((sum(is.na(dat$FCH4_1_1_1)))/ length(dat$FCH4_1_1_1)* 100,1) - nas$Inital_missing[2]
h_foken <- round((sum(is.na(dat$H_1_1_1)) )/ length(dat$H_1_1_1) * 100,1) - nas$Inital_missing[3]
le_foken <- round((sum(is.na(dat$LE_1_1_1)) )/ length(dat$LE_1_1_1) * 100,1) - nas$Inital_missing[4]
tau_foken <- round((sum(is.na(dat$TAU_1_1_1)) )/ length(dat$TAU_1_1_1) * 100,1) - nas$Inital_missing[5]
fokens <- data.frame(co2_foken, ch4_foken, h_foken, le_foken, tau_foken)
fokens <- melt(fokens)
#add the amount of data removed due to high foken flag to the 'nas' dataframe
nas <- cbind(nas, fokens[,2])
colnames(nas)[3] <- "foken"
#remove everything except dat and the list of missing values
rm(list = setdiff(ls(), c("dat","nas")))
nas
## variable Inital_missing foken
## 1 co2_na 21.5 5.9
## 2 ch4_na 58.8 5.8
## 3 h_na 10.3 7.7
## 4 le_na 21.7 0.0
## 5 tau_na 10.3 2.1
#replot fluxes some of the flux variables, color = foken number
p <- ggplot(dat, aes(x = Datetime))+
theme_bw()+
scale_color_manual(values = c("green","darkgreen","orange"))+
labs(x = "")
co2 <- p +
geom_point(aes(y = FC_1_1_1, col = FC_SSITC_TEST_1_1_1))
ch4 <- p +
geom_point(aes(y = FCH4_1_1_1, col = FCH4_SSITC_TEST_1_1_1))
plot_grid(co2, ch4, ncol = 1)
It is important to QC the meteorological data before attempting more with the flux data, as this is used to gap-fill fluxes later
The NWFP has several extra sources of meteorological and soil data that can be utilised to fill gaps in the EC meterological data and ensure quality. Here, we add data from: - NWFP soil moisture stations and met station - CEH COSMOS station (located on the NWFP) - ECMWF ERA-5 modelled data It may also be a good idea to add data from the other EC towers as an extra local source. This may be added here later.
# Load data. This file contains only catchment 4 data.
sms <- read.csv("W:/Documents/EC Advanced QC/sms_2020-01-01_2021-01-01.csv",
stringsAsFactors = TRUE)
# If you have more than one catchment in the data, filter.
sms <- sms %>% dplyr::select(contains("Datetime"),
contains("Catchment.4"))
# Tidy up column names
colnames(sms)
## [1] "Datetime"
## [2] "Precipitation..mm...Catchment.4.After..2013.08.13."
## [3] "Precipitation..mm...Catchment.4.After..2013.08.13..Quality"
## [4] "Precipitation..mm...Catchment.4.After..2013.08.13..Quality.Last.Modified"
## [5] "Soil.Temperature...15cm.Depth..oC...Catchment.4.After..2013.08.13."
## [6] "Soil.Temperature...15cm.Depth..oC...Catchment.4.After..2013.08.13..Quality"
## [7] "Soil.Temperature...15cm.Depth..oC...Catchment.4.After..2013.08.13..Quality.Last.Modified"
## [8] "Soil.Moisture...10cm.Depth......Catchment.4.After..2013.08.13."
## [9] "Soil.Moisture...10cm.Depth......Catchment.4.After..2013.08.13..Quality"
## [10] "Soil.Moisture...10cm.Depth......Catchment.4.After..2013.08.13..Quality.Last.Modified"
colnames(sms)[(names(sms) == "Precipitation..mm...Catchment.4.After..2013.08.13.")] <- "Precip_SMS"
colnames(sms)[(names(sms) == "Precipitation..mm...Catchment.4.After..2013.08.13..Quality")] <- "PrecipQC_SMS"
colnames(sms)[(names(sms) == "Precipitation..mm...Catchment.4.After..2013.08.13..Quality.Last.Modified")] <- "PrecipQCDate_SMS"
colnames(sms)[(names(sms) == "Soil.Temperature...15cm.Depth..oC...Catchment.4.After..2013.08.13.")] <- "TS_SMS"
colnames(sms)[(names(sms) == "Soil.Temperature...15cm.Depth..oC...Catchment.4.After..2013.08.13..Quality" )] <- "TSQC_SMS"
colnames(sms)[(names(sms) == "Soil.Temperature...15cm.Depth..oC...Catchment.4.After..2013.08.13..Quality.Last.Modified")] <- "TSQCDate_SMS"
colnames(sms)[(names(sms) == "Soil.Moisture...10cm.Depth......Catchment.4.After..2013.08.13.")] <- "SWC_SMS"
colnames(sms)[(names(sms) == "Soil.Moisture...10cm.Depth......Catchment.4.After..2013.08.13..Quality" )] <- "SWCQC_SMS"
colnames(sms)[(names(sms) == "Soil.Moisture...10cm.Depth......Catchment.4.After..2013.08.13..Quality.Last.Modified")] <- "SWCQCDate_SMS"
# Convert datetime to POSIX object
sms$Datetime <- as.POSIXct(sms$Datetime,
format="%Y/%m/%d %H:%M:%S", tz="UTC")
# SMS data is in 15-minute timesteps. We only need half-hourly to compare to the EC data.
# For precipitation, sum each 15min to 30min:
sms_30min <- sms %>% select(contains(c("Datetime", "Precip")))
library(lubridate)
sms_30min$Floor <- floor_date(sms_30min$Datetime, "30 minutes")
sum_30min_sum <- aggregate(x = list(sms_30min$Precip_SMS),
by = list(sms_30min$Floor), FUN = sum)
colnames(sum_30min_sum) <- c("Datetime", "Precip_SMS")
# For soil temperature and moisture, just select 30min timepoints (won't change much on 15min intervals)
sms <- sms[(which(grepl(":00:00|:30:00",sms$Datetime))),]
sms <- sms %>% select(c("Datetime", "TS_SMS", "SWC_SMS"))
sms <- merge(sms, sum_30min_sum, by = "Datetime")
# Convert units so they're the same as Li-Cor EC data.
# SMS and COSMOS soil water content are in %, licor is m^3/m^3
sms$SWC_SMS <- sms$SWC_SMS/100
#convert soil temp to K
sms$TS_SMS <- sms$TS_SMS+273.15
#Merge with dat
dat <- merge(dat, sms, by="Datetime", all.x=T, all.y = F)
head(dat)
## Datetime CO2_1_1_1 H2O_1_1_1 CH4_1_1_1 TAU_1_1_1
## 1 2020-01-01 00:30:00 NA NA NA NA
## 2 2020-01-01 01:00:00 NA NA NA NA
## 3 2020-01-01 01:30:00 NA NA NA NA
## 4 2020-01-01 02:00:00 NA NA NA NA
## 5 2020-01-01 02:30:00 NA NA NA NA
## 6 2020-01-01 03:00:00 NA NA NA NA
## TAU_SSITC_TEST_1_1_1 H_1_1_1 H_SSITC_TEST_1_1_1 LE_1_1_1 LE_SSITC_TEST_1_1_1
## 1 <NA> NA <NA> NA <NA>
## 2 <NA> NA <NA> NA <NA>
## 3 <NA> NA <NA> NA <NA>
## 4 <NA> NA <NA> NA <NA>
## 5 <NA> NA <NA> NA <NA>
## 6 <NA> NA <NA> NA <NA>
## FC_1_1_1 FC_SSITC_TEST_1_1_1 FCH4_1_1_1 FCH4_SSITC_TEST_1_1_1 WD_0_0_1
## 1 NA <NA> NA <NA> NA
## 2 NA <NA> NA <NA> NA
## 3 NA <NA> NA <NA> NA
## 4 NA <NA> NA <NA> NA
## 5 NA <NA> NA <NA> NA
## 6 NA <NA> NA <NA> NA
## WS_0_0_1 WS_MAX_0_0_1 U_SIGMA_0_0_1 V_SIGMA_0_0_1 W_SIGMA_0_0_1 USTAR_0_0_1
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## MO_LENGTH_0_0_1 ZL_0_0_1 FETCH_MAX_0_0_1 FETCH_70_0_0_1 FETCH_80_0_0_1
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## FETCH_90_0_0_1 PA_0_0_1 RH_0_0_1 TA_0_0_1 VPD_0_0_1 T_SONIC_0_0_1
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## T_SONIC_SIGMA_0_0_1 LWIN_1_1_1 LWOUT_1_1_1 PPFD_1_1_1 RH_1_1_1 RN_1_1_1
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## SHF_1_1_1 SHF_2_1_1 SHF_3_1_1 SWC_1_1_1 SWC_2_1_1 SWC_3_1_1 SWIN_1_1_1
## 1 NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## SWOUT_1_1_1 TA_1_1_1 TS_1_1_1 TS_2_1_1 TS_3_1_1 TS_SMS SWC_SMS Precip_SMS
## 1 NA NA NA NA NA 280.27 0.4076 0
## 2 NA NA NA NA NA 280.27 0.4076 0
## 3 NA NA NA NA NA 280.27 0.4071 0
## 4 NA NA NA NA NA 280.27 0.4071 0
## 5 NA NA NA NA NA 280.27 0.4071 0
## 6 NA NA NA NA NA 280.27 0.4071 0
rm(sms)
The Centre for Ecology and Hydrology (CEH, [https://www.ceh.ac.uk/]) has a COSMOS soil moisture station on the NWFP, on Catchment 2 (Great Field, red). This data is available on request to CEH [https://cosmos.ceh.ac.uk/content/requesting-data].
#### load data ####
cosmos <- read.csv("Z:/Data_COSMOS_Great_Field_PH/2014-2022_30min/NWYKE-2020-01-01-2020-12-31.csv",
skip = 6, header = F)
header <- read.csv("Z:/Data_COSMOS_Great_Field_PH/2014-2022_30min/NWYKE-2020-01-01-2020-12-31.csv",
nrows = 1, skip = 3, header = F, stringsAsFactors = F)
colnames(cosmos) <- header
rm(header)
#convert datetime to POSIX format
cosmos$`parameter-id` <- as.POSIXct(cosmos$`parameter-id`, format = "%Y-%m-%d %H:%M:%S", tz = "UTC")
#select relevant columns
cosmos <- cosmos %>%
select(c("parameter-id","PRECIPITATION_LEVEL2","RH_LEVEL2", "TA_LEVEL2", "PA_LEVEL2", "LWIN_LEVEL2",
"SWIN_LEVEL2", "LWOUT_LEVEL2", "SWOUT_LEVEL2", "RN_LEVEL2", "WD_LEVEL2", "WS_LEVEL2",
"G1_LEVEL2", "G2_LEVEL2", "STP_TSOIL10_LEVEL2", "TDT1_TSOIL_LEVEL2", "TDT2_TSOIL_LEVEL2",
"TDT1_VWC_LEVEL2", "TDT2_VWC_LEVEL2"))
#rename columns
colnames(cosmos)[(names(cosmos) == "parameter-id")] <- "Datetime"
colnames(cosmos)[(names(cosmos) == "PRECIPITATION_LEVEL2")] <- "P_RAIN_COSMOS"
colnames(cosmos)[(names(cosmos) == "RH_LEVEL2")] <- "RH_COSMOS"
colnames(cosmos)[(names(cosmos) == "TA_LEVEL2")] <- "TA_COSMOS"
colnames(cosmos)[(names(cosmos) == "PA_LEVEL2")] <- "PA_COSMOS"
colnames(cosmos)[(names(cosmos) == "LWIN_LEVEL2")] <- "LWIN_COSMOS"
colnames(cosmos)[(names(cosmos) == "SWIN_LEVEL2")] <- "SWIN_COSMOS"
colnames(cosmos)[(names(cosmos) == "LWOUT_LEVEL2")] <- "LWOUT_COSMOS"
colnames(cosmos)[(names(cosmos) == "SWOUT_LEVEL2")] <- "SWOUT_COSMOS"
colnames(cosmos)[(names(cosmos) == "RN_LEVEL2")] <- "RN_COSMOS"
colnames(cosmos)[(names(cosmos) == "WD_LEVEL2")] <- "WD_COSMOS"
colnames(cosmos)[(names(cosmos) == "WS_LEVEL2")] <- "WS_COSMOS"
colnames(cosmos)[(names(cosmos) == "G1_LEVEL2")] <- "SHF_COSMOS_1"
colnames(cosmos)[(names(cosmos) == "G2_LEVEL2")] <- "SHF_COSMOS_2"
colnames(cosmos)[(names(cosmos) == "STP_TSOIL10_LEVEL2")] <- "TS_COSMOS_1"
colnames(cosmos)[(names(cosmos) == "TDT1_TSOIL_LEVEL2")] <- "TS_COSMOS_2"
colnames(cosmos)[(names(cosmos) == "TDT2_TSOIL_LEVEL2")] <- "TS_COSMOS_3"
colnames(cosmos)[(names(cosmos) == "TDT1_VWC_LEVEL2")] <- "SWC_COSMOS_1"
colnames(cosmos)[(names(cosmos) == "TDT2_VWC_LEVEL2")] <- "SWC_COSMOS_2"
#convert air temp to K
cosmos$TA_COSMOS <- cosmos$TA_COSMOS + 273.15
#COSMOS air pressure is in hPa, licor is kPa
cosmos$PA_COSMOS <- cosmos$PA_COSMOS/10
# SMS and COSMOS soil water content are in %, licor is m^3/m^3
cosmos$SWC_COSMOS_1 <- cosmos$SWC_COSMOS_1/100
cosmos$SWC_COSMOS_2 <- cosmos$SWC_COSMOS_2/100
#convert soil temp to K
cosmos$TS_COSMOS_1 <- cosmos$TS_COSMOS_1+273.15
cosmos$TS_COSMOS_2 <- cosmos$TS_COSMOS_2+273.15
cosmos$TS_COSMOS_3 <- cosmos$TS_COSMOS_3+273.15
#merge with EC dataframe
dat <- merge(dat, cosmos, by = "Datetime", all.x = T, all.y = F)
rm(cosmos)
The NWFP has a meteorology station, data from which is available on the NWFP portal.
#load data
met <- read.csv("W:/Documents/EC Advanced QC/met_2020-01-01_2021-01-01.csv")
#select relevant columns
met <- met %>%
select(c("Datetime", "Air.Temperature..oC...Site.","Relative.Humidity....RH...Site.",
"Wind.Speed..km.h...Site.","Wind.Direction..o...Site.", "Solar.Radiation..W.m2...Site."))
#convert datetime to POSIX format
met$Datetime <- as.POSIXct(met$Datetime,
format="%Y/%m/%d %H:%M:%S", tz="UTC")
#Met data is in 15-minute timesteps. We only need half-hourly to compare to the EC data
met <- met[(which(grepl(":00:00|:30:00",met$Datetime))),]
#Rename columns
colnames(met)[(names(met) == "Solar.Radiation..W.m2...Site.")] <- "RN_MET"
colnames(met)[(names(met) == "Relative.Humidity....RH...Site.")] <- "RH_MET"
colnames(met)[(names(met) == "Air.Temperature..oC...Site.")] <- "TA_MET"
colnames(met)[(names(met) == "Wind.Direction..o...Site.")] <- "WD_MET"
colnames(met)[(names(met) == "Wind.Speed..km.h...Site.")] <- "WS_MET"
#convert temp to K
met$TA_MET <- met$TA_MET + 273.15
# MET wind speed is in km/h, licor is m/s
met$WS_MET <- met$WS_MET * 0.277778
#merge with dat
dat <- merge(dat, met, by = "Datetime", all.x = T, all.y = F)
head(dat)
## Datetime CO2_1_1_1 H2O_1_1_1 CH4_1_1_1 TAU_1_1_1
## 1 2020-01-01 00:30:00 NA NA NA NA
## 2 2020-01-01 01:00:00 NA NA NA NA
## 3 2020-01-01 01:30:00 NA NA NA NA
## 4 2020-01-01 02:00:00 NA NA NA NA
## 5 2020-01-01 02:30:00 NA NA NA NA
## 6 2020-01-01 03:00:00 NA NA NA NA
## TAU_SSITC_TEST_1_1_1 H_1_1_1 H_SSITC_TEST_1_1_1 LE_1_1_1 LE_SSITC_TEST_1_1_1
## 1 <NA> NA <NA> NA <NA>
## 2 <NA> NA <NA> NA <NA>
## 3 <NA> NA <NA> NA <NA>
## 4 <NA> NA <NA> NA <NA>
## 5 <NA> NA <NA> NA <NA>
## 6 <NA> NA <NA> NA <NA>
## FC_1_1_1 FC_SSITC_TEST_1_1_1 FCH4_1_1_1 FCH4_SSITC_TEST_1_1_1 WD_0_0_1
## 1 NA <NA> NA <NA> NA
## 2 NA <NA> NA <NA> NA
## 3 NA <NA> NA <NA> NA
## 4 NA <NA> NA <NA> NA
## 5 NA <NA> NA <NA> NA
## 6 NA <NA> NA <NA> NA
## WS_0_0_1 WS_MAX_0_0_1 U_SIGMA_0_0_1 V_SIGMA_0_0_1 W_SIGMA_0_0_1 USTAR_0_0_1
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## MO_LENGTH_0_0_1 ZL_0_0_1 FETCH_MAX_0_0_1 FETCH_70_0_0_1 FETCH_80_0_0_1
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## FETCH_90_0_0_1 PA_0_0_1 RH_0_0_1 TA_0_0_1 VPD_0_0_1 T_SONIC_0_0_1
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## T_SONIC_SIGMA_0_0_1 LWIN_1_1_1 LWOUT_1_1_1 PPFD_1_1_1 RH_1_1_1 RN_1_1_1
## 1 NA NA NA NA NA NA
## 2 NA NA NA NA NA NA
## 3 NA NA NA NA NA NA
## 4 NA NA NA NA NA NA
## 5 NA NA NA NA NA NA
## 6 NA NA NA NA NA NA
## SHF_1_1_1 SHF_2_1_1 SHF_3_1_1 SWC_1_1_1 SWC_2_1_1 SWC_3_1_1 SWIN_1_1_1
## 1 NA NA NA NA NA NA NA
## 2 NA NA NA NA NA NA NA
## 3 NA NA NA NA NA NA NA
## 4 NA NA NA NA NA NA NA
## 5 NA NA NA NA NA NA NA
## 6 NA NA NA NA NA NA NA
## SWOUT_1_1_1 TA_1_1_1 TS_1_1_1 TS_2_1_1 TS_3_1_1 TS_SMS SWC_SMS Precip_SMS
## 1 NA NA NA NA NA 280.27 0.4076 0
## 2 NA NA NA NA NA 280.27 0.4076 0
## 3 NA NA NA NA NA 280.27 0.4071 0
## 4 NA NA NA NA NA 280.27 0.4071 0
## 5 NA NA NA NA NA 280.27 0.4071 0
## 6 NA NA NA NA NA 280.27 0.4071 0
## P_RAIN_COSMOS RH_COSMOS TA_COSMOS PA_COSMOS LWIN_COSMOS SWIN_COSMOS
## 1 0 90.5 279.312 101.0200 339.0 -0.407
## 2 0 91.9 279.216 101.0157 340.1 -0.303
## 3 0 92.4 279.151 101.0028 340.2 -0.264
## 4 0 92.6 279.150 100.9738 337.8 -0.381
## 5 0 93.1 279.040 100.9869 338.4 -0.423
## 6 0 93.4 278.919 100.9732 339.6 -0.357
## LWOUT_COSMOS SWOUT_COSMOS RN_COSMOS WD_COSMOS WS_COSMOS SHF_COSMOS_1
## 1 344.0 0.058 -5.465 119.64802 1.319 -3.48590
## 2 344.0 0.103 -4.306 112.12532 1.211 -3.40073
## 3 344.0 0.128 -4.192 88.37720 1.341 -3.55727
## 4 343.7 0.187 -6.468 85.72376 1.375 -3.56768
## 5 343.5 0.061 -5.584 76.29304 1.390 -3.60690
## 6 343.3 0.102 -4.159 71.40737 1.298 -3.67597
## SHF_COSMOS_2 TS_COSMOS_1 TS_COSMOS_2 TS_COSMOS_3 SWC_COSMOS_1 SWC_COSMOS_2
## 1 -4.06361 280.316 280.35 280.35 0.5224 0.5446
## 2 -3.91849 280.361 280.35 280.35 0.5166 0.5446
## 3 -3.94397 280.359 280.40 280.35 0.5211 0.5446
## 4 -3.87514 280.356 280.40 280.33 0.5204 0.5446
## 5 -3.81961 280.353 280.35 280.35 0.5179 0.5453
## 6 -3.81690 280.363 280.40 280.35 0.5204 0.5440
## TA_MET RH_MET WS_MET WD_MET RN_MET
## 1 278.94 96.10 1.1194453 140.58 7.81
## 2 278.91 96.27 0.8638896 110.00 8.12
## 3 278.90 96.35 1.0305564 123.40 8.12
## 4 278.86 96.69 0.8638896 109.18 8.30
## 5 278.82 96.78 0.8083340 76.05 8.12
## 6 278.73 96.94 0.7138895 91.54 8.30
#adding VPD measurements
#Vapour Pressure Deficit is a measure of the difference between water vapour pressure at saturation and the actual water vapour #pressure. It is an important consideration for photosynthesis: rates decline when VPD increases. We will use it later for the
#gapfilling
dat$VPD_MET <- ((100-dat$RH_MET)/100)*
(610.7*10^((7.5*(dat$TA_MET-273.15))/(237.3+dat$TA_MET))) # NWFP Met VPD
dat$VPD_1_1_1 <- ((100-dat$RH_1_1_1)/100)*
(610.7*10^((7.5*(dat$TA_1_1_1-273.15))/(237.3+dat$TA_1_1_1))) # EC biomet VPD
rm(met)
The ECMWF (European Centre for Medium-Range Weather Forecasts) provides hourly estimates of many variables, that can be a useful ‘sanity check’ for the meteorological data. It is not as good as the previous variables for gap-filling as it’s modelled rather that true observational and is at a more coarse scale (30km); but it can be used to gap-fill where other options are not available. Data is available from https://www.ecmwf.int/en/forecasts/datasets/reanalysis-datasets/era5 using either their online tool or an API and python script.
# This script takes ERA5 data for the NWFP location [-3.906,50.7698] (previously downloaded via an API) and stitches it together.
source("X:/Advanced analysis/2021/5_External data/ERA5/ERA dataframe2020to2021.R")
colnames(era)[1] <- "Datetime"
dat <- plyr::join_all(list(dat, era), by = "Datetime", type = "left")
rm(era)
The code below shows what is run when ‘ERA dataframe.R’, above, is run. There should be no need to run this; if the scripts ‘ERA dataframe.R’ and ‘RasterStacktoDataFrame.R’ are in the right place and the required ERA5 data has been downloaded, just running the above with do it all.
library(raster)
library(ncdf4)
# Long/Lat location of North Wyke
pts <- SpatialPoints(cbind(-3.906,50.7698))
# List of variables as named in downloaded ERA dataset
varnames <- c("t2m","u10", "v10", "stl1", "slhf", "ssr", "str", "sp", "sshf", "ssrd", "strd", "tp", "swvl1")
# Varname descriptions
vars <- c("Air temperature 2m", "U wind component 10 metre", "V wind component 10 metre",
"Soil temperature level 1", "Surface latent heat flux", "Surface net solar radiation",
"Surface net thermal radiation", "Surface pressure", "Surface sensible heat flux",
"Surface solar radiation downwards", "Surface thermal radiation downwards",
"Total precipitation", "Volumetric soil water layer 1")
vars <- data.frame(cbind(vars, varnames))
#Function which takes netcdf files and converts them to a dataframe, taking only value from the
#gridcell specified in 'pts' above
#----------------------------------------------------------------------------------------------#
RasterStacktoDataFrame <- function(nc, vn, pts) {
ncfile <- stack(nc, varname = vn)
extractedData <- raster::extract(ncfile, pts, method = "simple")
dates <- colnames(extractedData)
df <- data.frame(cbind(dates, extractedData[1,]))
df$dates <- sub(pattern = "X", df$dates, replacement = "")
df$dates <- as.POSIXct(df$dates, format = "%Y.%m.%d.%H.%M.%S", tz = "Europe/London")
rownames(df) <- c()
as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}
df$V2 <- as.numeric.factor(as.factor((df$V2)))
colnames(df) <- c("Datetime", vn)
return(df)
}
#----------------------------------------------------------------------------------------------#
#which files to use for RasterStacktoDataFrame
ncs <- list.files("X:/ERA-5 data/post Dec2020 download/", pattern = ".nc")
ncs <- paste0("X:/ERA-5 data/post Dec2020 download/", ncs)
era <- data.frame()
#Loop RasterStacktoDataFrame for all the variables named in 'varnames' above
for (nc in ncs) {
for(i in varnames) {
temp <- RasterStacktoDataFrame(nc, i, pts)
if (exists("era_temp"))
era_temp <- merge(era_temp, temp, by = "Datetime")
if (!exists("era_temp"))
era_temp <- temp
temp)
}
era <- rbind(era, era_temp)
era_temp)
}
pts,i,varnames)
nc,ncs,RasterStacktoDataFrame)
#Timezone gets converted to GMT/BST somewhere above, which is wrong
era <- era %>% arrange(Datetime)
attr(era$Datetime, "tzone") <- "UTC"
#Radiation is reported by ERA as daily cumulative - convert to hourly total
era$Date <- as.Date(era$Date, tz = "utc")
era$time <- strftime(era$Datetime, format ="%H:%M:%S", tz = "utc")
#ssr
#net solar radiation
era$SRN_ERA <- NA
#str
#net thermal radiation
era$TRN_ERA <- NA
#slhf
era$LE_ERA <- NA
#strd
era$LWIN_ERA <- NA
#sshf
era$H_ERA <- NA
#tp
era$P_RAIN_ERA<- NA
#ssrd
#sw_in
era$SWIN_ERA <- NA
era$Datetime2 <- era$Datetime - 3600
era$Date2 <- as.Date(era$Datetime2, tz = "UTC")
era$time2 <- strftime(era$Datetime2, format ="%H:%M:%S", tz = "utc")
era <- era %>%
group_by(Date2) %>%
mutate(Diff_ssr = ssr - lag(ssr)) %>%
mutate(Diff_str = str - lag(str)) %>%
mutate(Diff_slhf = slhf - lag(slhf)) %>%
mutate(Diff_strd = strd - lag(strd)) %>%
mutate(Diff_sshf = sshf - lag(sshf)) %>%
mutate(Diff_tp = tp - lag(tp)) %>%
mutate(Diff_ssrd = ssrd - lag(ssrd))
era$SRN_ERA <- ifelse(era$time2 == "00:00:00", era$ssr,
era$Diff_ssr)
era$TRN_ERA <- ifelse(era$time2 == "00:00:00", era$str,
era$Diff_str)
era$LE_ERA <- ifelse(era$time2 == "00:00:00", era$slhf,
era$Diff_slhf)
era$LWIN_ERA <- ifelse(era$time2 == "00:00:00", era$strd,
era$Diff_strd)
era$H_ERA <- ifelse(era$time2 == "00:00:00", era$sshf,
era$Diff_sshf)
era$P_RAIN_ERA<- ifelse(era$time2 == "00:00:00", era$tp,
era$Diff_tp)
era$SWIN_ERA <- ifelse(era$time2 == "00:00:00", era$ssrd,
era$Diff_ssrd)
#LE stored as flux from air to ground; in Jm2.
#Need to change to ground to air and Wm2
era$LE_ERA <- era$LE_ERA * -1
era$LE_ERA <- era$LE_ERA /3600
#NETRAD is solar + thermal
#Need to convert from Jm2 as well
era$RN_ERA <- era$SRN_ERA + era$TRN_ERA
era$RN_ERA <- era$RN_ERA / 3600
#LW_IN is in Jm2
era$LWIN_ERA <- era$LWIN_ERA/3600
#H stored as flux from air to ground; in Jm2.
era$H_ERA <- era$H_ERA/3600
era$H_ERA <- era$H_ERA * -1
#SWIN is in Jm2
era$SWIN_ERA <- era$SWIN_ERA/3600
#rain is in m; convert to mm
era$P_RAIN_ERA <- era$P_RAIN_ERA * 1000
#colnames only
era$TA_ERA <- era$t2m
era$PA_ERA <- era$sp
era <- era[,c(1,17:23,34:36)]
vars)
era <- data.frame(era)
As there are 3 soil moisture, temperature and heat flux probes from the EC system, these can be condensed into one measurement. Any that are not working can be removed, those that are working can be averaged.
#Soil heat flux - 3 sensors
dat %>%
subset(select = c("Datetime", vars_select(colnames(dat), contains("SHF_"))))%>%
melt(id.vars = "Datetime")%>%
ggplot(aes(x = Datetime, y = value))+
geom_point()+
theme_bw()+
facet_wrap(.~variable, ncol = 1)
#Can see two of the EC heat flux sensors (SHF_1_1_1 and SHF_3_1_1) are reasonably similar. They can be condensed to one EC heat flux value:
dat$SHF_mean <- rowMeans(subset(dat, select = c(SHF_1_1_1, SHF_3_1_1)), na.rm = T)
#SHF_2_1_1 is missing, so we don't use this.
#Can also see there are a few gaps in the EC data. We can use the COSMOS data to fill these later. Can see here that SHF_COSMOS_1 looks to be very similar to our own measurements.
#Soil water content - 3 sensors
dat %>%
subset(select = c("Datetime", vars_select(colnames(dat), contains("SWC_"))))%>%
melt(id.vars = "Datetime")%>%
ggplot(aes(x = Datetime, y = value))+
geom_point()+
theme_bw()+
facet_wrap(.~variable, ncol = 1, scales = "free")
#Can see that none of the soil water content sensors from the tower are working for this time period. We can instead use SWC_SMS, from the same field, and use the COSMOS data to fill any gaps in that.
#dat$SWC_mean <- rowMeans(subset(dat, select = c(SWC_1_1_1, SWC_2_1_1, SWC_3_1_1)), na.rm = T)
dat$SWC_mean <- NA
#Soil temperature - 3 sensors
dat %>%
subset(select = c("Datetime", vars_select(colnames(dat), contains("TS_"))))%>%
melt(id.vars = "Datetime")%>%
ggplot(aes(x = Datetime, y = value))+
geom_point()+
theme_bw()+
facet_wrap(.~variable, ncol = 1, scales = "free")
#Again none are good - do not merge. We can use TS_SMS instead as above.
#dat$TS_mean <- rowMeans(subset(dat, select = c(TS_1_1_1, TS_2_1_1, TS_3_1_1)), na.rm = T)
dat$TS_mean <- NA
Using the extra data we’ve added, we can fill gaps in the EC meteorological data, and replace any faulty data. Where possible, it is usually preferable to fill/replace using data from within the same field, or as close as possible to the original EC data. Not all of these meteorological variables will be used to generate the footprints and gap-filled fluxes described later, but they are all helpful variables to have when interpreting/writing up the data.
library(rlang)
# This function sets out the graphs to see how the different data sources compare
source("D:/R Functions/EC_QC_graphs.R")
The code below shows what is included in the above function ‘EC_QC_graphs.R’
BMG_graphs <- function (name = NA, primary = NA, auxilliary = NA, auxilliary2 = NA) {
#define primary variable (i.e. from the EC tower)
sym_primary <- sym(primary)
#define first variable to compare (i.e. best one to use for gap-filling, usually closest)
sym_auxilliary <- if (!is.na(auxilliary)) sym(auxilliary)
#define secondary variable for comparisons (another field, or modelled data)
sym_auxilliary2 <- if (!is.na(auxilliary2)) sym(auxilliary2)
library(ggplot2)
library(scales)
library(plyr)
library(dplyr)
library(ggpubr)
library(rlang)
#max/min for axis
x <- if (any(colnames(dat) == primary, na.rm = T)) min(dat[,primary], na.rm = T)
y <- if (any(colnames(dat) == auxilliary, na.rm = T)) min(dat[,auxilliary], na.rm = T)
z <- if (any(colnames(dat) == auxilliary2, na.rm = T)) min(dat[,auxilliary2], na.rm = T)
amin <- min(x,y,z)
x,y,z)
x <- if (any(colnames(dat) == primary, na.rm = T)) max(dat[,primary], na.rm = T)
y <- if (any(colnames(dat) == auxilliary, na.rm = T)) max(dat[,auxilliary], na.rm = T)
z <- if (any(colnames(dat) == auxilliary2, na.rm = T)) max(dat[,auxilliary2], na.rm = T)
amax <- max(x,y,z)
x,y,z)
## correlation graphs
if (!is.na(auxilliary2)) {
vauxilliary2 <- ggplot(dat, aes(x = dat[,auxilliary2], y = dat[,primary]))+
geom_point()+
coord_fixed(ratio = 1, xlim = c(amin,amax), ylim = c(amin,amax), expand = TRUE, clip = "on")+
xlab(label = "auxilliary2")+
ylab(label = "primary")+
theme_bw()
}
if (!is.na(auxilliary)) {
vauxilliary <- ggplot(dat, aes(x = dat[,auxilliary], y = dat[,primary]))+
geom_point()+
coord_fixed(ratio = 1, xlim = c(amin,amax), ylim = c(amin,amax), expand = TRUE, clip = "on")+
xlab(label = "auxilliary")+
ylab(label = "primary")+
theme_bw()
}
if (!is.na(auxilliary2)) {
if (!is.na(auxilliary)) {
auxilliaryvauxilliary2 <- ggplot(dat, aes(x = dat[,auxilliary2], y = dat[,auxilliary]))+
geom_point()+
coord_fixed(ratio = 1, xlim = c(amin, amax), ylim = c(amin, amax), expand = TRUE, clip = "on")+
xlab(label = "auxilliary2")+
ylab(label = "auxilliary")+
theme_bw()
}
}
## timeseries graphs
ts1 <- ggplot(dat, aes(x = Datetime))+
geom_line(aes(y = dat[,primary]), col = "purple")+
coord_cartesian(ylim = c(amin,amax))+
scale_x_datetime( breaks = date_breaks("2 weeks"),
labels = date_format("%d/%m", tz="UTC"),
expand = c(0,0))+
ylab(label = "primary")+
xlab(label = "Time [Hr]")+
theme_bw()
if (!is.na(auxilliary)) {
ts2 <- ggplot(dat[!is.na(dat[,auxilliary]),], aes(x = Datetime))+
geom_line(na.rm = T, aes(y = dat[!is.na(dat[,auxilliary]),auxilliary]), col = "green")+
coord_cartesian(ylim = c(amin,amax))+
scale_x_datetime( breaks = date_breaks("2 weeks"),
labels = date_format("%d/%m", tz="UTC"),
expand = c(0,0))+
ylab(label = "auxilliary")+
xlab(label = "Datetime [d/m]")+
theme_bw()
}
if (!is.na(auxilliary2)) {
ts3 <- ggplot(dat[!is.na(dat[,auxilliary2]),], aes(x = Datetime))+
geom_line(na.rm = T, aes(y = dat[!is.na(dat[,auxilliary2]),auxilliary2]), col = "orange")+
coord_cartesian(ylim = c(amin,amax))+
scale_x_datetime( breaks = date_breaks("2 weeks"),
labels = date_format("%d/%m", tz="UTC"),
expand = c(0,0))+
ylab(label = "auxilliary2")+
xlab(label = "Datetime [d/m]")+
theme_bw()
}
#convert datetime to POSIX format
dat$time <- as.POSIXct(strftime(dat$Datetime, format = "%H:%M:%S", tz = "UTC"),
format="%H:%M:%S", tz = "utc")
#funtion for getting mean value and removing nas
mean.na <- function(x) {
mean(x, na.rm = T)
}
#mean value at each time of day. Used for diel curve
means <- dat %>%
group_by(time) %>%
dplyr::summarize(mean_1_1_1 = mean.na(!!sym_primary),
mean_0_1_1 = mean.na(!!sym_auxilliary2),
mean_9_1_1 = mean.na(!!sym_auxilliary))
#plot average daily curve with primary and auxilliary variables
means_p <- ggplot(means)+
geom_point(aes(y = mean_1_1_1, x = time), col = "purple", fill = "purple", size = 5, shape = 21, alpha = 0.4)+
geom_point(aes(y = mean_0_1_1, x = time), col = "orange", fill = "orange", size = 5, shape = 21, alpha = 0.4)+
geom_point(aes(y = mean_9_1_1, x = time), col = "green", fill = "green", size = 5, shape = 21, alpha = 0.4)+
scale_x_datetime( breaks = date_breaks("60 mins"),
labels = date_format("%H:%M", tz="UTC"),
expand = c(0,0))+
xlab(label = "Time")+
ylab(label = name)+
theme_bw()
#list of correlation graphs we've generated
corrs <- list( if(exists("ts1")) ts1,
if(exists("ts2")) ts2,
if(exists("ts3")) ts3)
#list of diel curve graphs we've generated
ts <- list( if(exists("vauxilliary2")) vauxilliary2,
if(exists("vauxilliary")) vauxilliary,
if(exists("auxilliaryvauxilliary2")) auxilliaryvauxilliary2)
#plot the graphs
p <- ggarrange(ggarrange(plotlist=ts, ncol = 3), ggarrange(ggarrange(plotlist=corrs, ncol = 1),means_p, ncol = 2), ncol = 1)
annotate_figure(p, top = text_grob(name, color = "red", face = "bold", size = 30))
}
Run the ‘EC_QC_graphs’ function for each of the meteorological variables ### 4.1.1 SWIN
# SWIN ---
BMG_graphs(name = "Shortwave Radiation Incoming (SWIN)", primary = "SWIN_1_1_1", auxilliary = "SWIN_COSMOS", auxilliary2 = "SWIN_ERA")
BMG_graphs(name = "Shortwave Radiation Outgonig (SWOUT)", primary = "SWOUT_1_1_1", auxilliary = "SWOUT_COSMOS", auxilliary2 = NA)
BMG_graphs(name = "Air Pressure (PA)", primary = "PA_0_0_1", auxilliary = "PA_COSMOS", auxilliary2 = "PA_ERA")
# LE ---
BMG_graphs(name = "Latent Energy (LE)", primary = "LE_1_1_1", auxilliary = NA, auxilliary2 = "LE_ERA")
BMG_graphs(name = "Longwave Radiation incoming (LW-IN)", primary = "LWIN_1_1_1", auxilliary = "LWIN_COSMOS", auxilliary2 = "LWIN_ERA")
BMG_graphs(name = "Longwave Radiation Outgoing (LW_out)", primary = "LWOUT_1_1_1", auxilliary = "LWOUT_COSMOS", auxilliary2 = NA)
BMG_graphs(name = "Net Radiation (RN)", primary = "RN_1_1_1", auxilliary = "RN_COSMOS", auxilliary2 = "RN_ERA")
BMG_graphs(name = "Sensible Heat (H)", primary = "H_1_1_1", auxilliary = NA, auxilliary2 = "H_ERA")
BMG_graphs(name = "Relative Humidity (RH)", primary = "RH_1_1_1", auxilliary = "RH_COSMOS", auxilliary2 = NA)
BMG_graphs(name = "Soil Heat Flux (SHF)", primary = "SHF_mean", auxilliary = "SHF_COSMOS_1", auxilliary2 = NA)
BMG_graphs(name = "Soil Water Content (SWC)", primary = "SWC_SMS", auxilliary = "SWC_COSMOS_2", auxilliary2 = NA)
BMG_graphs(name = "Air Temperature (TA)", primary = "TA_1_1_1", auxilliary = "TA_COSMOS", auxilliary2 = "TA_ERA")
#COSMOS_2 is at 5cm, same as EC
BMG_graphs(name = "Soil Temperature (TS)", primary = "TS_SMS", auxilliary = "TS_COSMOS_2", auxilliary2 = NA)
#more cosmos also available
BMG_graphs(name = "Vapour Pressure Deficit (VPD)", primary = "VPD_1_1_1", auxilliary = "VPD_MET", auxilliary2 = NA)
BMG_graphs(name = "Wind direction (WD)", primary = "WD_0_0_1", auxilliary = "WD_COSMOS", auxilliary2 = "WD_MET")
BMG_graphs(name = "Wind Speed (WS)", primary = "WS_0_0_1", auxilliary = "WS_COSMOS", auxilliary2 = "WS_MET")
For shortwave Radiation incoming (SWIN) we can se there are some gaps in the primary (EC) dataset, but the axilliary data (COSMOS) is complete and looks to be a good fit. So, we can use this to fill gaps in the primary dataset.
#linear model of relationship between EC and COSMOS SWIN
SWIN_lm <- summary(lm(dat$SWIN_1_1_1 ~ dat$SWIN_COSMOS))
SWIN_lm
##
## Call:
## lm(formula = dat$SWIN_1_1_1 ~ dat$SWIN_COSMOS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -300.956 0.523 1.376 2.433 293.244
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.1641619 0.1805432 -6.448 1.16e-10 ***
## dat$SWIN_COSMOS 0.9681861 0.0007187 1347.194 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.82 on 16531 degrees of freedom
## (1083 observations deleted due to missingness)
## Multiple R-squared: 0.991, Adjusted R-squared: 0.991
## F-statistic: 1.815e+06 on 1 and 16531 DF, p-value: < 2.2e-16
#Create new variable
dat$SWIN_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$SWIN_filled <- ifelse(is.na(dat$SWIN_1_1_1),
(SWIN_lm$coefficients[1] + SWIN_lm$coefficients[2] * dat$SWIN_COSMOS),
(dat$SWIN_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = SWIN_1_1_1), col = "purple")+
geom_point(aes(y = SWIN_filled), shape = 3, col = "grey")+
geom_line(aes(y = SWIN_filled), col = "grey")+
theme_bw()
rm(SWIN_lm)
Again, there are a few gaps in the EC dataset but the COSMOS data is complete and can be used to fill the gaps.
#linear model of relationship between EC and COSMOS SWIN
SWOUT_lm <- summary(lm(dat$SWOUT_1_1_1 ~ dat$SWOUT_COSMOS))
SWOUT_lm
##
## Call:
## lm(formula = dat$SWOUT_1_1_1 ~ dat$SWOUT_COSMOS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74.616 -1.432 -0.755 0.509 57.026
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.400351 0.066798 20.96 <2e-16 ***
## dat$SWOUT_COSMOS 1.052706 0.001272 827.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.295 on 16531 degrees of freedom
## (1083 observations deleted due to missingness)
## Multiple R-squared: 0.9764, Adjusted R-squared: 0.9764
## F-statistic: 6.845e+05 on 1 and 16531 DF, p-value: < 2.2e-16
#Create new variable
dat$SWOUT_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$SWOUT_filled <- ifelse(is.na(dat$SWOUT_1_1_1),
(SWOUT_lm$coefficients[1] + SWOUT_lm$coefficients[2] * dat$SWOUT_COSMOS),
(dat$SWOUT_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = SWOUT_1_1_1), col = "purple")+
geom_point(aes(y = SWOUT_filled), shape = 3, col = "grey")+
geom_line(aes(y = SWOUT_filled), col = "grey")+
theme_bw()
rm(SWOUT_lm)
#ta data has some 'drop outs' that can be removed
library(anomalize)
library(tidyverse)
pa_anom<- dat %>%
as_tibble() %>%
drop_na(PA_0_0_1) %>%
time_decompose(PA_0_0_1, method = "stl", frequency = "1 hour", trend = "24 hours") %>%
anomalize(remainder) %>%
time_recompose() %>%
plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.5)
plot(pa_anom)
dat <- merge(dat, pa_anom$data[, c(1,8)], by = "Datetime", all = T)
dat$PA_0_0_1[dat$anomaly == "Yes"] <- NA
dat <- subset(dat, select=-c(anomaly))
PA_lm <- summary(lm(dat$PA_0_0_1 ~ dat$PA_COSMOS))
PA_lm
##
## Call:
## lm(formula = dat$PA_0_0_1 ~ dat$PA_COSMOS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.97314 -0.27461 0.03108 0.27074 1.87679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 31.323526 0.398434 78.62 <2e-16 ***
## dat$PA_COSMOS 0.685261 0.003999 171.36 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5069 on 13140 degrees of freedom
## (4474 observations deleted due to missingness)
## Multiple R-squared: 0.6909, Adjusted R-squared: 0.6908
## F-statistic: 2.936e+04 on 1 and 13140 DF, p-value: < 2.2e-16
#Create new variable
dat$PA_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$PA_filled <- ifelse(is.na(dat$PA_0_0_1),
(PA_lm$coefficients[1] + PA_lm$coefficients[2] * dat$PA_COSMOS),
(dat$PA_0_0_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = PA_0_0_1), col = "purple")+
geom_point(aes(y = PA_filled), shape = 3, col = "grey")+
geom_line(aes(y = PA_filled), col = "grey")+
theme_bw()
rm(PA_lm, pa_anom)
#remove single outlier from EC data
dat[28, 9] <- NA
LE_lm <- summary(lm(dat$LE_1_1_1 ~ dat$LE_ERA))
LE_lm
##
## Call:
## lm(formula = dat$LE_1_1_1 ~ dat$LE_ERA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -566.93 -12.51 3.42 10.15 719.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8.733901 0.597242 -14.62 <2e-16 ***
## dat$LE_ERA 0.883567 0.007308 120.90 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39.45 on 6614 degrees of freedom
## (11000 observations deleted due to missingness)
## Multiple R-squared: 0.6885, Adjusted R-squared: 0.6884
## F-statistic: 1.462e+04 on 1 and 6614 DF, p-value: < 2.2e-16
#Create new variable
dat$LE_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$LE_filled <- ifelse(is.na(dat$LE_1_1_1),
(LE_lm$coefficients[1] + LE_lm$coefficients[2] * dat$LE_ERA),
(dat$LE_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = LE_1_1_1), col = "purple")+
geom_point(aes(y = LE_filled), shape = 3, col = "grey")+
geom_line(aes(y = LE_filled), col = "grey")+
theme_bw()
rm(LE_lm)
LWIN_lm <- summary(lm(dat$LWIN_1_1_1 ~ dat$LWIN_ERA))
LWIN_lm
##
## Call:
## lm(formula = dat$LWIN_1_1_1 ~ dat$LWIN_ERA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78.026 -11.297 1.161 11.650 65.086
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.294136 1.766123 40.37 <2e-16 ***
## dat$LWIN_ERA 0.814271 0.005324 152.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.05 on 8804 degrees of freedom
## (8810 observations deleted due to missingness)
## Multiple R-squared: 0.7265, Adjusted R-squared: 0.7265
## F-statistic: 2.339e+04 on 1 and 8804 DF, p-value: < 2.2e-16
#Create new variable
dat$LWIN_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$LWIN_filled <- ifelse(is.na(dat$LWIN_1_1_1),
(LWIN_lm$coefficients[1] + LWIN_lm$coefficients[2] * dat$LWIN_ERA),
(dat$LWIN_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = LWIN_1_1_1), col = "purple")+
geom_point(aes(y = LWIN_filled), shape = 3, col = "grey")+
geom_line(aes(y = LWIN_filled), col = "grey")+
theme_bw()
rm(LWIN_lm)
Fill using COSMOS
#linear model of relationship between EC and COSMOS SWIN
LWOUT_lm <- summary(lm(dat$LWOUT_1_1_1 ~ dat$LWOUT_COSMOS))
LWOUT_lm
##
## Call:
## lm(formula = dat$LWOUT_1_1_1 ~ dat$LWOUT_COSMOS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.241 -0.926 0.113 0.975 40.429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.1644344 0.2885806 -31.76 <2e-16 ***
## dat$LWOUT_COSMOS 1.0225026 0.0007806 1309.85 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.093 on 16531 degrees of freedom
## (1083 observations deleted due to missingness)
## Multiple R-squared: 0.9905, Adjusted R-squared: 0.9905
## F-statistic: 1.716e+06 on 1 and 16531 DF, p-value: < 2.2e-16
#Create new variable
dat$LWOUT_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$LWOUT_filled <- ifelse(is.na(dat$LWOUT_1_1_1),
(LWOUT_lm$coefficients[1] + LWOUT_lm$coefficients[2] * dat$LWOUT_COSMOS),
(dat$LWOUT_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = LWOUT_1_1_1), col = "purple")+
geom_point(aes(y = LWOUT_filled), shape = 3, col = "grey")+
geom_line(aes(y = LWOUT_filled), col = "grey")+
theme_bw()
rm(LWOUT_lm)
#linear model of relationship between EC and COSMOS SWIN
RN_lm <- summary(lm(dat$RN_1_1_1 ~ dat$RN_COSMOS))
RN_lm
##
## Call:
## lm(formula = dat$RN_1_1_1 ~ dat$RN_COSMOS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -216.129 -4.330 3.884 7.074 227.421
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.314562 0.168981 -43.29 <2e-16 ***
## dat$RN_COSMOS 0.938419 0.001003 936.08 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.82 on 16531 degrees of freedom
## (1083 observations deleted due to missingness)
## Multiple R-squared: 0.9815, Adjusted R-squared: 0.9815
## F-statistic: 8.762e+05 on 1 and 16531 DF, p-value: < 2.2e-16
#Create new variable
dat$RN_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$RN_filled <- ifelse(is.na(dat$RN_1_1_1),
(RN_lm$coefficients[1] + RN_lm$coefficients[2] * dat$RN_COSMOS),
(dat$RN_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = RN_1_1_1), col = "purple")+
geom_point(aes(y = RN_filled), shape = 3, col = "grey")+
geom_line(aes(y = RN_filled), col = "grey")+
theme_bw()
rm(RN_lm)
#linear model of relationship between EC and ERA SWIN
H_lm <- summary(lm(dat$H_1_1_1 ~ dat$H_ERA))
H_lm
##
## Call:
## lm(formula = dat$H_1_1_1 ~ dat$H_ERA)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7380.1 -13.6 -1.8 8.1 3080.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.67726 1.59028 -0.426 0.67
## dat$H_ERA 0.67929 0.03371 20.154 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 140.8 on 7868 degrees of freedom
## (9746 observations deleted due to missingness)
## Multiple R-squared: 0.04909, Adjusted R-squared: 0.04897
## F-statistic: 406.2 on 1 and 7868 DF, p-value: < 2.2e-16
#Create new variable
dat$H_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$H_filled <- ifelse(is.na(dat$H_1_1_1),
(H_lm$coefficients[1] + H_lm$coefficients[2] * dat$H_ERA),
(dat$H_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = H_1_1_1), col = "purple")+
geom_point(aes(y = H_filled), shape = 3, col = "grey")+
geom_line(aes(y = H_filled), col = "grey")+
theme_bw()
rm(H_lm)
#linear model of relationship between EC and COSMOS SWIN
RH_lm <- summary(lm(dat$RH_1_1_1 ~ dat$RH_COSMOS))
RH_lm
##
## Call:
## lm(formula = dat$RH_1_1_1 ~ dat$RH_COSMOS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -85.537 -16.311 -4.173 6.791 101.471
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 192.67044 2.16472 89.00 <2e-16 ***
## dat$RH_COSMOS -1.99220 0.02483 -80.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.09 on 8695 degrees of freedom
## (8919 observations deleted due to missingness)
## Multiple R-squared: 0.4254, Adjusted R-squared: 0.4253
## F-statistic: 6437 on 1 and 8695 DF, p-value: < 2.2e-16
#Create new variable
dat$RH_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$RH_filled <- ifelse(is.na(dat$RH_1_1_1),
(RH_lm$coefficients[1] + RH_lm$coefficients[2] * dat$RH_COSMOS),
(dat$RH_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = RH_1_1_1), col = "purple")+
geom_point(aes(y = RH_filled), shape = 3, col = "grey")+
geom_line(aes(y = RH_filled), col = "grey")+
theme_bw()
rm(RH_lm)
#linear model of relationship between EC and COSMOS
#better relationship with COSMOS_1, so using that
SHF_lm <- summary(lm(dat$SHF_1_1_1 ~ dat$SHF_COSMOS_1))
SHF_lm
##
## Call:
## lm(formula = dat$SHF_1_1_1 ~ dat$SHF_COSMOS_1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.6526 -1.0840 -0.0943 0.8941 11.6718
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.095794 0.014904 -6.427 1.33e-10 ***
## dat$SHF_COSMOS_1 0.578666 0.001586 364.940 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.915 on 16535 degrees of freedom
## (1079 observations deleted due to missingness)
## Multiple R-squared: 0.8896, Adjusted R-squared: 0.8896
## F-statistic: 1.332e+05 on 1 and 16535 DF, p-value: < 2.2e-16
#Create new variable
dat$SHF_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$SHF_filled <- ifelse(is.na(dat$SHF_1_1_1),
(SHF_lm$coefficients[1] + SHF_lm$coefficients[2] * dat$SHF_COSMOS_1),
(dat$SHF_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = SHF_1_1_1), col = "purple")+
geom_point(aes(y = SHF_filled), shape = 3, col = "grey")+
geom_line(aes(y = SHF_filled), col = "grey")+
theme_bw()
rm(SHF_lm)
#don't have any valid EC measurements here, so just use the cosmos data
#Create new variable
dat$SWC_filled <- dat$SWC_COSMOS_1
#ta_1_1_1 data has some 'drop outs' that can be removed
plot(dat$TA_1_1_1 ~ dat$Datetime)
#OR use TA_0_0_1 instead (this is air temp from the CO2 analyser, TA_1_1_1 is the peripheral biomet sensor)
dat$TA_0_0_1 <- dat$TA_0_0_1 + 273.15
#linear model of relationship between EC and COSMOS SWIN
TA_lm <- summary(lm(dat$TA_0_0_1 ~ dat$TA_COSMOS))
TA_lm
##
## Call:
## lm(formula = dat$TA_0_0_1 ~ dat$TA_COSMOS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.4014 -1.9576 0.1846 1.6945 6.9017
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.789063 0.868908 17.02 <2e-16 ***
## dat$TA_COSMOS 0.959018 0.003061 313.35 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.945 on 15754 degrees of freedom
## (1860 observations deleted due to missingness)
## Multiple R-squared: 0.8617, Adjusted R-squared: 0.8617
## F-statistic: 9.819e+04 on 1 and 15754 DF, p-value: < 2.2e-16
#Create new variable
dat$TA_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$TA_filled <- ifelse(is.na(dat$TA_0_0_1),
(TA_lm$coefficients[1] + TA_lm$coefficients[2] * dat$TA_COSMOS),
(dat$TA_0_0_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = TA_0_0_1), col = "purple")+
geom_point(aes(y = TA_filled), shape = 3, col = "grey")+
geom_line(aes(y = TA_filled), col = "grey")+
theme_bw()
rm(TA_lm, ta_anom)
#don't have any valid EC measurements here, so just use the cosmos data
#Create new variable
#use cosmos_2 as that's at 5cm depth same as EC
dat$TS_filled <- dat$TS_COSMOS_2
#linear model of relationship between EC and COSMOS SWIN
VPD_lm <- summary(lm(dat$VPD_1_1_1 ~ dat$VPD_MET))
VPD_lm
##
## Call:
## lm(formula = dat$VPD_1_1_1 ~ dat$VPD_MET)
##
## Residuals:
## Min 1Q Median 3Q Max
## -206.27 0.85 17.42 25.09 554.12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.150e+02 9.271e-01 124.00 <2e-16 ***
## dat$VPD_MET 2.047e-01 5.795e-03 35.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 66.68 on 8741 degrees of freedom
## (8873 observations deleted due to missingness)
## Multiple R-squared: 0.1249, Adjusted R-squared: 0.1248
## F-statistic: 1247 on 1 and 8741 DF, p-value: < 2.2e-16
#Create new variable
dat$VPD_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$VPD_filled <- ifelse(is.na(dat$VPD_1_1_1),
(VPD_lm$coefficients[1] + VPD_lm$coefficients[2] * dat$VPD_MET),
(dat$VPD_1_1_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = VPD_1_1_1), col = "purple")+
geom_point(aes(y = VPD_filled), shape = 3, col = "grey")+
geom_line(aes(y = VPD_filled), col = "grey")+
theme_bw()
rm(VPD_lm)
#linear model of relationship between EC and COSMOS SWIN
WD_lm <- summary(lm(dat$WD_0_0_1 ~ dat$WD_COSMOS))
WD_lm
##
## Call:
## lm(formula = dat$WD_0_0_1 ~ dat$WD_COSMOS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -325.27 -9.26 3.58 12.42 343.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.383747 1.127993 11.87 <2e-16 ***
## dat$WD_COSMOS 0.868328 0.004999 173.72 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.58 on 15755 degrees of freedom
## (1859 observations deleted due to missingness)
## Multiple R-squared: 0.657, Adjusted R-squared: 0.657
## F-statistic: 3.018e+04 on 1 and 15755 DF, p-value: < 2.2e-16
#Create new variable
dat$WD_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$WD_filled <- ifelse(is.na(dat$WD_0_0_1),
(WD_lm$coefficients[1] + WD_lm$coefficients[2] * dat$WD_COSMOS),
(dat$WD_0_0_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = WD_0_0_1), col = "purple")+
geom_point(aes(y = WD_filled), shape = 3, col = "grey")+
geom_line(aes(y = WD_filled), col = "grey")+
theme_bw()
rm(WD_lm)
#linear model of relationship between EC and COSMOS SWIN
WS_lm <- summary(lm(dat$WS_0_0_1 ~ dat$WS_COSMOS))
WS_lm
##
## Call:
## lm(formula = dat$WS_0_0_1 ~ dat$WS_COSMOS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.8187 -0.3452 0.0925 0.5099 3.1680
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.288734 0.011723 -24.63 <2e-16 ***
## dat$WS_COSMOS 0.841713 0.003237 260.04 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7784 on 15755 degrees of freedom
## (1859 observations deleted due to missingness)
## Multiple R-squared: 0.811, Adjusted R-squared: 0.811
## F-statistic: 6.762e+04 on 1 and 15755 DF, p-value: < 2.2e-16
#Create new variable
dat$WS_filled <- NA
#Fill new variable with EC data where available, data modelled from lm if not
dat$WS_filled <- ifelse(is.na(dat$WS_0_0_1),
(WS_lm$coefficients[1] + WS_lm$coefficients[2] * dat$WS_COSMOS),
(dat$WS_0_0_1))
#graph to check fit is sensible
ggplot(dat, aes(x = Datetime))+
geom_point(aes(y = WS_0_0_1), col = "purple")+
geom_point(aes(y = WS_filled), shape = 3, col = "grey")+
geom_line(aes(y = WS_filled), col = "grey")+
theme_bw()
rm(WS_lm)
H and Le have been gapfilled using ERA data, so there should be few if any gaps in the new ‘filled’ variables.
#count nas after removing fokens
h_gapfill <- round((sum(is.na(dat$H_filled)) )/ length(dat$H_filled) * 100,1) - nas$Inital_missing[3]
le_gapfill <- round((sum(is.na(dat$LE_filled)) )/ length(dat$LE_filled) * 100,1) - nas$Inital_missing[4]
gapfilled <- data.frame(h_gapfill, le_gapfill)
gapfilled <- melt(gapfilled)
nas <- cbind(nas, c(NA, NA, gapfilled[,2], NA))
colnames(nas)[4] <- "gapfilled"
#remove everything except dat and the list of missing values
rm(list = setdiff(ls(), c("dat","nas")))
#check if we've managed to duplicate anything
dat <- dat[!duplicated(dat$Datetime, fromLast = T),]
When atmospheric conditions are very stable, particularly at night, there can be insufficient turbulence for the assumptions of eddy covariance to hold. Ustar is a measure of turbulent flow. NEE should be independent of ustar; but during times of low turbulence it may not be. The ustar threshold is the ustar value above which NEE is no longer dependent on ustar, and hence when fluxes can be considered valid.
We need a full year of data for this, ideally. For the future, if we find the threshold stays the same we don’t need to do this step; just use the value found previously for that field.
#Eddy cov processing package
library(REddyProc)
#Just take the variables we need
ds <- dat
colnames(ds)[which(names(ds) == "USTAR_0_0_1")] <- "Ustar"
colnames(ds)[which(names(ds) == "FC_1_1_1")] <- "NEE"
colnames(ds)[which(names(ds) == "TA_filled")] <- "Tair"
colnames(ds)[which(names(ds) == "SWIN_filled")] <- "Rg"
colnames(ds)[which(names(ds) == "Datetime")] <- "DateTime"
colnames(ds)[which(names(ds) == "VPD_filled")] <- "VPD"
ds <- ds[!duplicated(ds$DateTime, fromLast = T),]
ds$Tair <- ds$Tair - 273.15 #EddyProc expects temp in degC
ds$VPD <- ds$VPD * 100 #1 pascal is equal to 0.01 hPa.
#convert data to format needed by REddypro
EProc <- sEddyProc$new('Green',
ds, c('NEE','Rg','Tair','VPD', 'Ustar'))
#Do ustar analysis
Ustar <- EProc$sEstUstarThold(UstarColName = "Ustar", NEEColName = "NEE",
TempColName = "Tair", RgColName = "Rg",
ctrlUstarSub = usControlUstarSubsetting(taClasses = 7, UstarClasses = 20,
swThr = 10, minRecordsWithinTemp = 50,
minRecordsWithinSeason = 160,
minRecordsWithinYear = 3000,
isUsingOneBigSeasonOnFewRecords = TRUE))
Ustar
## aggregationMode seasonYear season uStar
## 1 single NA <NA> 0.2365516
## 2 year 2020 <NA> 0.2365516
## 3 season 2020 2020001 0.2365516
## 4 season 2020 2020003 0.1495611
## 5 season 2020 2020006 0.1953585
## 6 season 2020 2020009 0.1448318
## 7 season 2020 2020012 0.1069071
UstarThreshold <- Ustar$uStar[Ustar$aggregationMode == 'single']
#Plots NEE vs Ustar to a separate file
EProc$sPlotNEEVersusUStarForSeason()
REddyproc likes to save images to file rather than print directly to the screen. So I’ve got it here
library(knitr)
###CHANGE THIS!!!###############################################################################################
################################################################################################################
###############################################################################################################
include_graphics("W:/Documents/EC Advanced QC/plots/Green_20-21_NEEvsUStar_2020006_none.pdf")
Now we know the turbulence needed for fluxes to be valid, we can remove any that fall below.
#remove fluxes when ustar is lower than measured threshold
dat$FC_1_1_1[dat$USTAR_0_0_1 < UstarThreshold] <- NA
dat$FCH4_1_1_1[dat$USTAR_0_0_1 < UstarThreshold] <- NA
dat$H_filled[dat$USTAR_0_0_1 < UstarThreshold] <- NA
dat$LE_filled[dat$USTAR_0_0_1 < UstarThreshold] <- NA
dat$TAU_1_1_1[dat$USTAR_0_0_1 < UstarThreshold] <- NA
#count nas after removing fokens
co2_ustar <- round((sum(is.na(dat$FC_1_1_1)) )/ length(dat$FC_1_1_1) * 100,1) - nas$Inital_missing[1] - nas$foken[1]
ch4_ustar <- round((sum(is.na(dat$FCH4_1_1_1)))/ length(dat$FCH4_1_1_1)* 100,1) - nas$Inital_missing[2]- nas$foken[2]
h_ustar <- round((sum(is.na(dat$H_filled)) )/ length(dat$H_1_1_1) * 100,1) - nas$Inital_missing[3]- nas$foken[3]
le_ustar <- round((sum(is.na(dat$LE_filled)) )/ length(dat$LE_1_1_1) * 100,1) - nas$Inital_missing[4]- nas$foken[4]
tau_ustar <- round((sum(is.na(dat$TAU_1_1_1)) )/ length(dat$TAU_1_1_1) * 100,1) - nas$Inital_missing[5]- nas$foken[5]
ustar_missing <- data.frame(co2_ustar, ch4_ustar, h_ustar, le_ustar, tau_ustar)
ustar_missing <- melt(ustar_missing)
nas <- cbind(nas, ustar_missing[,2])
colnames(nas)[5] <- "ustar"
#remove everything except dat and the list of missing values
rm(list = setdiff(ls(), c("dat","nas", "UstarThreshold")))
nas
## variable Inital_missing foken gapfilled ustar
## 1 co2_na 21.5 5.9 NA 31.3
## 2 ch4_na 58.8 5.8 NA 14.9
## 3 h_na 10.3 7.7 2.0 31.4
## 4 le_na 21.7 0.0 -12.8 26.5
## 5 tau_na 10.3 2.1 NA 37.4
The exact area that the EC tower is measuring depends on wind speed and direction (as well as tower height). We only want to keep data when the majority of the area measured is within the field.The footprint analysis will tell us what percentage of the footprint is within the field, so we can remove timepoints where the EC is actually measuring the road or woods, for example.
library(ggplot2)
library(RColorBrewer)
#just get the columns we need (wind and turbulence variables)
library(dplyr)
dat_fp <- dat %>%
dplyr::select(c("Datetime", "MO_LENGTH_0_0_1", "USTAR_0_0_1", "V_SIGMA_0_0_1", "WD_filled"))
dat_fp$USTAR_0_0_1[dat_fp$USTAR_0_0_1 < UstarThreshold] <- NA
#set up empty data frame for output
fp_output <- data.frame()
field_output <- data.frame()
#set up empty list for plots
plot_list = list()
for(i in 1:nrow(dat_fp)) {
row <- dat_fp[i,]
#where data is available, run footprint
ifelse (sum(is.na(row)) == 0, source("X:/Advanced analysis/2021/6_Processing manuals/FFP/FFP_for_loop_Green.R"), c(sum_fp <- NA,sum_field <- NA))
#footprint script outputs an estimate of total percentage of measured area falling within field. Put in dataframe.
x <- data.frame(row$Datetime, sum_fp)
a <- data.frame(row$Datetime, sum_field)
field_output <- rbind(field_output, x)
fp_output <- rbind(fp_output, a)
#print a selection of footprints
if (i %% 2000 == 0) {
p = ggplot(footprint, aes( x = Easting, y = Northing))+
geom_point(aes(col = Field))+
scale_color_gradient(low = "grey", high = "white")+
geom_tile(aes(fill = Footprint), alpha = 0.7)+
scale_fill_distiller(palette = "YlGnBu", direction = 1)+
theme_bw()
ggsave("ffp_plot.png", p)
plot_list[[i]] = p
}
}
plot_list <- purrr::compact(plot_list)
library(ggpubr)
plots <- ggarrange(plotlist = plot_list, common.legend = TRUE)
ggsave("ffp_plots.png", plots)
A few examples of where the flux is coming from on the field
plots
#merge output from footprint too with main dataframe
colnames(fp_output)[1] <- "Datetime"
colnames(fp_output)[2] <- "Footprint"
dat <- merge(dat, fp_output, by = "Datetime")
#merge output from field percent with main dataframe also
colnames(field_output) <- c("Datetime", "Percent_of_Field")
dat <- merge(dat, field_output, by = "Datetime")
#here removing any fluxes where less than 70% of the data comes from outside the field. This can of course be adjusted.
dat$FC_1_1_1[dat$Footprint < 0.7] <- NA
dat$FCH4_1_1_1[dat$Footprint < 0.7] <- NA
dat$H_filled[dat$Footprint < 0.7] <- NA
dat$LE_filled[dat$Footprint < 0.7] <- NA
dat$TAU_1_1_1[dat$Footprint < 0.7] <- NA
co2_fp <- round((sum(is.na(dat$FC_1_1_1)) )/ length(dat$FC_1_1_1) * 100,1) - nas$Inital_missing[1] - nas$ustar[1]
ch4_fp <- round((sum(is.na(dat$FCH4_1_1_1)))/ length(dat$FCH4_1_1_1)* 100,1) - nas$Inital_missing[2]- nas$ustar[2]
h_fp <- round((sum(is.na(dat$H_filled)) )/ length(dat$H_1_1_1) * 100,1) - nas$Inital_missing[3]- nas$ustar[3]
le_fp <- round((sum(is.na(dat$LE_filled)) )/ length(dat$LE_1_1_1) * 100,1) - nas$Inital_missing[4]- nas$ustar[4]
tau_fp <- round((sum(is.na(dat$TAU_1_1_1)) )/ length(dat$TAU_1_1_1) * 100,1) - nas$Inital_missing[5]- nas$ustar[5]
fp_missing <- data.frame(co2_fp, ch4_fp, h_fp, le_fp, tau_fp)
fp_missing <- melt(fp_missing)
nas <- cbind(nas, fp_missing[,2])
colnames(nas)[6] <- "footprint"
#remove everything except dat and the list of missing values
rm(list = setdiff(ls(), c("dat","nas", "UstarThreshold")))
nas
## variable Inital_missing foken gapfilled ustar footprint
## 1 co2_na 21.5 5.9 NA 31.3 18.5
## 2 ch4_na 58.8 5.8 NA 14.9 13.5
## 3 h_na 10.3 7.7 2.0 31.4 21.4
## 4 le_na 21.7 0.0 -12.8 26.5 14.0
## 5 tau_na 10.3 2.1 NA 37.4 16.1
Usually around 50% of CO2 data. If too much has been removed, we could decide to allow data with lower footprint contribution or ustar values, for example.
nas$total_to_gapfill <- rowSums(nas[,2:6], na.rm = T)
Sections 3 and 4 gave us a complete, QCed meteorological dataset. Sections 2, 5 and 6 removed any low quality flux data. Now we can fill the gaps this left using the complete meteorological data.
This is adapted from this guidance https://rdrr.io/rforge/REddyProc/man/sEddyProc.example.html
library(REddyProc)
ds <- dat
colnames(ds)[which(names(ds) == "USTAR_0_0_1")] <- "Ustar"
colnames(ds)[which(names(ds) == "FC_1_1_1")] <- "NEE"
colnames(ds)[which(names(ds) == "TA_filled")] <- "Tair"
colnames(ds)[which(names(ds) == "SWIN_filled")] <- "Rg"
colnames(ds)[which(names(ds) == "Datetime")] <- "DateTime"
colnames(ds)[which(names(ds) == "VPD_filled")] <- "VPD"
#ds <- ds[!duplicated(ds$DateTime, fromLast = T),]
ds$Tair <- ds$Tair - 273.15 #EddyProc expects temp in degC
ds$VPD <- ds$VPD * 100 #1 pascal is equal to 0.01 hPa.
as.numeric.factor <- function(x) {as.numeric(levels(x))[x]}
ds$FC_SSITC_TEST_1_1_1 <- as.numeric.factor(ds$FC_SSITC_TEST_1_1_1)
EProc <- sEddyProc$new('Green',
ds, c('NEE','Rg','Tair','VPD', 'Ustar', 'FC_SSITC_TEST_1_1_1'))
#Initialize 'NEE' as variable to fill
EProc$sFillInit('NEE')
# Set variables and tolerance intervals
# twutz: \u00B1 is the unicode for '+over-'
V1.s='Rg'; T1.n=50 # Global radiation 'Rg' within \u00B150 W m-2
V2.s='VPD'; T2.n=5 # Vapour pressure deficit 'VPD' within 5 hPa
V3.s='Tair'; T3.n=2.5 # Air temperature 'Tair' within \u00B12.5 degC
# Step 1: Look-up table with window size \u00B17 days
Result_Step1.F <- EProc$sFillLUT(7, V1.s, T1.n, V2.s, T2.n, V3.s, T3.n)
# Step 2: Look-up table with window size \u00B114 days
Result_Step2.F <- EProc$sFillLUT(14, V1.s, T1.n, V2.s, T2.n, V3.s, T3.n)
# Step 3: Look-up table with window size \u00B17 days, Rg only
Result_Step3.F <- EProc$sFillLUT(7, V1.s, T1.n)
# Step 4: Mean diurnal course with window size 0 (same day)
Result_Step4.F <- EProc$sFillMDC(0)
# Step 5: Mean diurnal course with window size \u00B11, \u00B12 days
Result_Step5a.F <- EProc$sFillMDC(1)
Result_Step5b.F <- EProc$sFillMDC(2)
# Step 6: Look-up table with window size \u00B121, \u00B128, ..., \u00B170
for( WinDays.i in seq(21,70,7) ) Result_Step6.F <- EProc$sFillLUT(WinDays.i, V1.s, T1.n, V2.s, T2.n, V3.s, T3.n)
# Step 7: Look-up table with window size \u00B114, \u00B121, ..., \u00B170, Rg only
for( WinDays.i in seq(14,70,7) ) Result_Step7.F <- EProc$sFillLUT(WinDays.i, V1.s, T1.n)
# Step 8: Mean diurnal course with window size \u00B17, \u00B114, ..., \u00B1210 days
for( WinDays.i in seq(7,210,7) ) Result_Step8.F <- EProc$sFillMDC(WinDays.i)
# Export results, columns are named 'VAR_'
FilledEddyData.F <- EProc$sExportResults()
library(lubridate)
dat <- cbind(dat, FilledEddyData.F)
dat$Time <- as.POSIXct(strftime(dat$Datetime, format="%H:%M:%S", tz = "UTC"), format="%H:%M:%S", tz = "UTC")
dat$Date<-as.Date(dat$Datetime)
datmonth<-month(ymd(dat$Date),label=T)
colnames(dat)[which(names(dat) == "VAR_f")] <- "FC_filled"
rm(list = setdiff(ls(), c("dat","nas", "EProc", "ds")))
Take a few rows of data out and repeat, see how it compares to the original to indicate how good the gap-filling of true missing data is. May need to work on this, at the moment I’ve randomly removed ~ 10% of the data and filled, but really we should randomly remove some data where larger chunks of time are missing (hours, days, weeks) as it will likely affect performance.
ds <- dat
ds$FC_performance <- as.numeric(apply (ds[,c("FC_1_1_1"),drop=F], 2, function(x) {x[sample( c(1:nrow(ds)), 1750)] <- NA; x} ))
colnames(ds)[which(names(ds) == "USTAR_0_0_1")] <- "Ustar"
colnames(ds)[which(names(ds) == "FC_performance")] <- "NEE"
colnames(ds)[which(names(ds) == "TA_filled")] <- "Tair"
colnames(ds)[which(names(ds) == "SWIN_filled")] <- "Rg"
colnames(ds)[which(names(ds) == "Datetime")] <- "DateTime"
colnames(ds)[which(names(ds) == "VPD_filled")] <- "VPD"
ds$Tair <- ds$Tair - 273.15 #EddyProc expects temp in degC
ds$VPD <- ds$VPD * 100 #1 pascal is equal to 0.01 hPa.
EProc_test <- sEddyProc$new('Green',
ds, c('NEE','Rg','Tair','VPD', 'Ustar'))
#Initialize 'NEE' as variable to fill
EProc_test$sFillInit('NEE')
# Set variables and tolerance intervals
# twutz: \u00B1 is the unicode for '+over-'
V1.s='Rg'; T1.n=50 # Global radiation 'Rg' within \u00B150 W m-2
V2.s='VPD'; T2.n=5 # Vapour pressure deficit 'VPD' within 5 hPa
V3.s='Tair'; T3.n=2.5 # Air temperature 'Tair' within \u00B12.5 degC
# Step 1: Look-up table with window size \u00B17 days
Result_Step1.F <- EProc_test$sFillLUT(7, V1.s, T1.n, V2.s, T2.n, V3.s, T3.n)
# Step 2: Look-up table with window size \u00B114 days
Result_Step2.F <- EProc_test$sFillLUT(14, V1.s, T1.n, V2.s, T2.n, V3.s, T3.n)
# Step 3: Look-up table with window size \u00B17 days, Rg only
Result_Step3.F <- EProc_test$sFillLUT(7, V1.s, T1.n)
# Step 4: Mean diurnal course with window size 0 (same day)
Result_Step4.F <- EProc_test$sFillMDC(0)
# Step 5: Mean diurnal course with window size \u00B11, \u00B12 days
Result_Step5a.F <- EProc_test$sFillMDC(1)
Result_Step5b.F <- EProc_test$sFillMDC(2)
# Step 6: Look-up table with window size \u00B121, \u00B128, ..., \u00B170
for( WinDays.i in seq(21,70,7) ) Result_Step6.F <- EProc_test$sFillLUT(WinDays.i, V1.s, T1.n, V2.s, T2.n, V3.s, T3.n)
# Step 7: Look-up table with window size \u00B114, \u00B121, ..., \u00B170, Rg only
for( WinDays.i in seq(14,70,7) ) Result_Step7.F <- EProc_test$sFillLUT(WinDays.i, V1.s, T1.n)
# Step 8: Mean diurnal course with window size \u00B17, \u00B114, ..., \u00B1210 days
for( WinDays.i in seq(7,210,7) ) Result_Step8.F <- EProc_test$sFillMDC(WinDays.i)
# Export results, columns are named 'VAR_'
FilledEddyData.performance <- EProc_test$sExportResults()
ds$FC_filled_performance <- FilledEddyData.performance$VAR_f
ds$is_performance <- 0
ds$is_performance[!is.na(ds$FC_1_1_1) & is.na(ds$NEE)] <- 1
ds_performance <- ds[ds$is_performance == 1,]
date_breaks <- seq(min(ds_performance$Time), max(ds_performance$Time), "4 hour")
date_labels <- format(date_breaks, "%H:%M")
lo <- ds_performance %>%
summarise_at(vars(c(FC_1_1_1, FC_filled_performance)), min) %>%
min()
hi <- ds_performance %>%
summarise_at(vars(c(FC_1_1_1, FC_filled_performance)), max) %>%
max()
ggplot(ds_performance, aes(x = FC_1_1_1, y = FC_filled_performance, color = as.numeric(Time)))+
geom_point()+
scale_colour_gradientn("Time",colours = terrain.colors(12),
breaks = as.numeric(date_breaks),
labels = date_labels)+
theme_bw()+
labs(x = "Original CO2 Flux", y = "Artifical gaps, filled for performance check")+
coord_fixed(ratio = 1, xlim = c(lo,hi), ylim = c(lo,hi))
rm(list = setdiff(ls(), c("dat","nas", "ds", "EProc")))
Lot’s of infomation on these graphs: you can see the yearly and daily patterns of CO2 flux.
p1 <- ggplot(data = dat, aes(x = Time,y = Date)) +
geom_tile(aes(fill = FC_1_1_1)) +
scale_x_datetime(date_labels = "%H:%M",expand=c(0,0))+
scale_y_date(expand = c(0,0))+
scale_fill_gradient2(na.value="lightyellow",
limits = c(-40,40), name = "CO2 Flux")+
theme_bw()+
theme(axis.text.x = element_text(size=20))+
theme(axis.text.y = element_text(size=20))+
theme(axis.title.x = element_text(size=20))+
theme(axis.title.y = element_text(size=20))+
theme(legend.text = element_text(size=20))+
labs(caption = expression(paste("CO" [2]," Flux, ??mol s"^"-1","m"^"-2")))+
ggtitle("Burrows 2020")+
theme(text = element_text(size = 20))
p2 <- ggplot(data = dat, aes(x = Time,y = Date)) +
geom_tile(aes(fill = FC_filled)) +
scale_x_datetime(date_labels = "%H:%M",expand=c(0,0))+
scale_y_date(expand = c(0,0))+
scale_fill_gradient2(na.value="lightyellow",
limits = c(-40,40), name = "CO2 Flux")+
theme_bw()+
theme(axis.text.x = element_text(size=20))+
theme(axis.text.y = element_text(size=20))+
theme(axis.title.x = element_text(size=20))+
theme(axis.title.y = element_text(size=20))+
theme(legend.text = element_text(size=20))+
labs(caption = expression(paste("CO" [2]," Flux, ??mol s"^"-1","m"^"-2")))+
ggtitle("Burrows 2020")+
theme(text = element_text(size = 20))
library(cowplot)
plot_grid(p1,p2)
To check that the gap-filling has preserved the day-night pattern
dat_time <- dat %>%
dplyr::select((c(FC_1_1_1, FC_filled, Time))) %>%
group_by(Time) %>%
mutate(Flux = mean(FC_1_1_1, na.rm = T), Flux_filled = mean(FC_filled))
Method <- c("Flux" = "green", "Filled Flux" = "orange")
ggplot(dat_time, aes(x = Time))+
geom_point(aes(y = Flux, col = "Flux"), size = 5)+
geom_point(aes(y = Flux_filled, col = "Filled Flux"), pch = 1, size = 5)+
theme_bw()+
scale_color_manual(values = Method)+
labs(color = "Method",
y = "CO2 umol/m2/s")
CO2 flux is the NET flux, with contributions from both respiration (positive flux from field to atmosphere) and photosynthesis (negative flux). We can assume that 100% of contributions during the nighttime are from respiration, then use the relationship between temperature and nighttime respiration to estimate the daytime respiration, and so also estimate GPP.
#Green location Lon = -3.90613, Lat = 50.7698
#Blue location Lon = -3.89857, Lat = 50.7669
#Red Location Lon = -3.90513, Lat = 50.7739
EProc$sSetLocationInfo(LatDeg = 50.7669, LongDeg = -3.89857, TimeZoneHour = 0)
#TimeZoneHour = Time zone: hours shift to UTC, e.g. 1 for Berlin
EProc$sMDSGapFill('Tair', FillAll = FALSE, minNWarnRunLength = NA)
EProc$sMDSGapFill('VPD', FillAll = FALSE, minNWarnRunLength = NA)
EProc$sSetUStarSeasons(seasonFactor = usCreateSeasonFactorMonth(ds$DateTime))
#EProc$sMRFluxPartitionUStarScens()
EProc$sMRFluxPartition(FluxVar.s = "NEE", QFFluxVar = "FC_SSITC_TEST_1_1_1",TempVar.s = "Tair",
RadVar.s = "Rg")
Total flux each month. Useful to see what months are net positive/negative, and overall yearly pattern with less noise.
dat$Month <- month(dat$Datetime, label = T)
dat$Year <- year(dat$Datetime)
dat$ymon <- paste(dat$Year, dat$Month)
dat$ymon <- factor(dat$ymon, ordered = T, levels = c("2020 Jan", "2020 Feb", "2020 Mar",
"2020 Apr", "2020 May", "2020 Jun",
"2020 Jul", "2020 Aug", "2020 Sep",
"2020 Oct", "2020 Nov", "2020 Dec"))
dat_ymon <- dat %>%
dplyr::select((c(FC_1_1_1, FC_filled, ymon))) %>%
group_by(ymon) %>%
mutate(Flux = sum(FC_1_1_1, na.rm = T), Flux_filled = sum(FC_filled))
ggplot(dat_ymon, aes(x = ymon))+
geom_bar(stat = "identity", aes(y = (Flux_filled*60*30)/1000/1000), col = "orange")+
theme_bw()+
ylab(label = "CO2 mol/m2")
Is the field a sink or source of CO2 for the study period, and what is the total net CO2?
#Total flux for this time period (would normally be full calendar year)
#go from per second to per half-hourly timestep
a <- sum(dat$FC_filled*60*30)
#go from umol to mol
a <- a/1000/1000
#go from mol to grams CO2
a <- a*44.01
#go from m2 to hectare
a <- a*10000
#go from grams to tonnes
a <- a/1000/1000
print(paste(round(a,2), "tonnes CO2/ hectare, total estimate"))
## [1] "-6.57 tonnes CO2/ hectare, total estimate"
ifelse(a > 0,
"Positive flux indicates field is a net CO2 source",
"Negative flux indicates field is a net CO2 sink")
## [1] "Negative flux indicates field is a net CO2 sink"
The difference between energy IN and energy OUT should be around zero. Frequently for EC systems, the energy balance is not ‘closed’, thought to be due to underestimation of sensible heat (H) and latent heat (LE).
#Plot energy OUT (as latent and sensible heat) vs energy IN (net radiation minus soil heat flux)
ggplot(dat, aes(y = H_1_1_1+LE_1_1_1, x = RN_1_1_1 - SHF_mean))+
geom_point()+
coord_fixed(xlim = c(-1000,1000), ylim = c(-1000,1000), ratio = 1)+
theme_bw()+
geom_abline(intercept = 0, slope = 1)
summary(lm((dat$H_1_1_1+dat$LE_1_1_1) ~ (dat$RN_1_1_1-dat$SHF_mean)))
##
## Call:
## lm(formula = (dat$H_1_1_1 + dat$LE_1_1_1) ~ (dat$RN_1_1_1 - dat$SHF_mean))
##
## Residuals:
## Min 1Q Median 3Q Max
## -4700.3 -23.0 -0.5 17.4 3153.4
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.04558 1.22455 -1.67 0.0949 .
## dat$RN_1_1_1 0.70750 0.00733 96.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 126.5 on 12563 degrees of freedom
## (5051 observations deleted due to missingness)
## Multiple R-squared: 0.4258, Adjusted R-squared: 0.4258
## F-statistic: 9316 on 1 and 12563 DF, p-value: < 2.2e-16
#slope of 65, indicating 65% closure. This is pretty typical. There is an argument that H and LE should be 'corrected' so that the energy balance is closed. I haven't tried this yet.
dat$Month <- as.factor(month(dat$Datetime, label = T, abb = T))
ggplot(dat, aes(x=Time, y=FC_filled, group=Month, color=Month))+
theme_bw()+
scale_x_datetime(date_labels = "%H:%M",expand=c(0.01,0.01))+
scale_y_continuous(limits=c(-30, 30),
breaks=pretty(x=c(-30,30)))+
labs(y= expression(paste("CO" [2]," Flux, umol s"^"-1","m"^"-2")), color = "Month")+
stat_summary(aes(group=Month),fun=mean,geom="line",size=1)+
scale_color_discrete(h=c(300,0))
#funtion for getting mean value and removing nas
mean.na <- function(x) {
mean(x, na.rm = T)
}
#CH4 flux is in nanomoles, so divided by 100 gives micromoles
ggplot(dat, aes(x=Time, y=FCH4_1_1_1/1000,group=Month,color=Month))+
geom_point(size=0.9)+
theme_bw()+
scale_x_datetime(date_labels = "%H:%M",expand=c(0.01,0.01))+
labs(y= expression(paste("CH" [4]," Flux, umol s"^"-1","m"^"-2")), color = "Month")+
stat_summary(aes(group=Month),fun.y=mean.na,geom="line",size=1)+
scale_color_discrete(h=c(300,0))
# 10 Download data
Download the completed file we have created from the initial NWFP download.
x <- Sys.Date()
field = "Green"
year = "2020"
write.csv(dat, file = paste0("W:/Documents/EC Advanced QC/", field, year, x, ".csv"), row.names = F)