times <-seq(-pi, pi, length.out =20)x1 <-tibble(ts = times, value =sin(ts), which ="s1")x2 <-tibble(ts = times, value =sin(ts) +0.3, which ="s2")x <-bind_rows(x1, x2)ggplot(x, aes(ts, value, color = which)) +geom_point() +geom_line()
The idea of GF_MAC is to use their mean annual cycle (visualized here by the gray dashed line) as a gap-filling guide:
Code
x %>%group_by(ts) %>%summarise(value =mean(value)) -> x_meanlast_plot() +geom_line(data = x_mean, color ="grey", linetype =2)
Insert a gap in the s1 sensor:
Code
x$value[9:11] <-NAggplot(x, aes(ts, value, color = which)) +geom_point(na.rm =TRUE) +geom_line() +geom_line(data = x_mean, color ="grey", linetype =2)
The last s1 value before the gap is -0.736 and the corresponding MAC value is -0.586; the values on the right side of the gap are 0.476 and 0.626, respectively. This gives us the scaling information we need to fill the gap using the MAC.
fill_gap <-function(x, gap_begin, gap_end, mac) {stopifnot(gap_end >= gap_begin)stopifnot(gap_begin >0&& gap_begin <=length(x))stopifnot(gap_end >0&& gap_end <=length(x))stopifnot(length(x) ==length(mac))# x and mac are numeric vectors of equal length# gap_begin and gap_end are the indices of the first and last NAs of the gapif(gap_begin >1) { left_adjust <- x[gap_begin -1] - mac[gap_begin -1] } else { left_adjust <-0 }#message("left_adjust ", left_adjust)if(gap_end <length(x)) { right_adjust <- x[gap_end +1] - mac[gap_end +1] } else { right_adjust <-0 }#message("right_adjust ", right_adjust)# Calculate adjustments for the gap points plus the pre- and post-gap# good data points that make up the calculation of left_adjust and right_adjust adj <-seq(left_adjust, right_adjust, length.out = gap_end - gap_begin +3)# ...but then drop those out-of-gap adjustment values adj <- adj[c(-1, -length(adj))]# Return the adjusted mean annual cycle values to fill the gap mac[gap_begin:gap_end] + adj}find_gaps <-function(x) { rle_x <-rle(is.na(x))# Compute endpoints of run end <-cumsum(rle_x$lengths) start <-c(1, lag(end)[-1] +1)data.frame(start, end)[rle_x$values,] # return only TRUE value gaps}fill_all_gaps <-function(x, mac) {stopifnot(length(x) ==length(mac)) gaps <-find_gaps(x) gapfilled <-rep(NA_real_, length(x))for(i inseq_len(nrow(gaps))) { thisgap <- gaps$start[i]:gaps$end[i] gapfilled[thisgap] <-fill_gap(x, gaps$start[i], gaps$end[i], mac)# just to make the graphs look good, return the boundary points tooif(gaps$start[i] >1) gapfilled[gaps$start[i] -1] <- x[gaps$start[i] -1]if(gaps$end[i] <length(x)) gapfilled[gaps$end[i] +1] <- x[gaps$end[i] +1] }return(gapfilled)}
On the left, the gap filling converges back towards the MAC because there’s no left-hand s1 data point to use. (We could change this behavior and say we’ll maintain a constant adjustment.)
On the right, if there’s no MAC to guide the algorithm (i.e., we’ve never measured any data at this timestamp), we can’t do any gap filling.
We’re not trying to reconstruct the variability of individual sensors with this method.
A real example
Code
SITE <-"CRC_UP"# change this to e.g. "CRC_W" if you want a particular plotVARIABLE <-"soil-temp-10cm"if(file.exists("GF_MAC_dat.RDS")) {# pre-existing data file dat <-readRDS("GF_MAC_dat.RDS")} else {# This is code used to read the needed L1 data# Assumes that the data are available in data/ regex <-paste0("^", SITE, ".*", VARIABLE, ".*csv$") files <-list.files("../data/L1/", pattern = regex, recursive =TRUE, full.names =TRUE)library(readr) dat <-lapply(files, function(f) {read_csv(f, col_types ="ccTccccdccii") }) dat <-bind_rows(dat) dat$ID <- dat$Instrument <- dat$Instrument_ID <- dat$Location <-NULLlibrary(lubridate) dat$yday <-yday(dat$TIMESTAMP) dat$hour <-hour(dat$TIMESTAMP) dat$minute <-minute(dat$TIMESTAMP) }dat %>%group_by(yday, hour, minute) %>%summarise(mean_value =mean(Value, na.rm =TRUE), .groups ="drop") -> mean_datdat_combined <-left_join(dat, mean_dat, by =c("yday", "hour", "minute"))# Sensor dataggplot(dat_combined, aes(TIMESTAMP, Value, color = Sensor_ID)) +geom_line() +facet_grid(Sensor_ID~.) +ggtitle(paste(SITE, VARIABLE))