This is an R script of a reproducible example for mass aggregation of scientific data, as described in -
“Robust mass aggregation-based bedload flux prediction in flash flood conditions using an acoustic sensor” by Eran Halfi, Dror Paz, Kyle Stark, Ian Reid, Michael Dorman, Jonathan B. Laronne
Usage: replace the sample CSV file bedload_mass_calibration_example.csv file with a different one, following this structure.
## date weight Hp
## 1 2013-12-11 23:58:00 0.029397 0.0
## 2 2013-12-11 23:58:30 0.271035 0.1
## 3 2013-12-11 23:59:00 0.626709 0.1
## 4 2013-12-11 23:59:30 1.008832 0.1
## 5 2013-12-12 00:00:00 1.303752 0.1
## 6 2013-12-12 00:00:30 1.673890 0.1
library(plyr)
# Read file
dat = read.csv("bedload_mass_calibration_example.csv", skip = 1, stringsAsFactors = FALSE)
head(dat)## Date.time LC.kg LC.change.kg LC.flux..30.s..kg.sm d.cm
## 1 12/11/2013 23:58:00 0.029397 0.0 0.0 56.9
## 2 12/11/2013 23:58:30 0.271035 0.2 0.1 56.7
## 3 12/11/2013 23:59:00 0.626709 0.4 0.1 56.7
## 4 12/11/2013 23:59:30 1.008832 0.4 0.1 56.8
## 5 12/12/2013 00:00:00 1.303752 0.3 0.1 56.7
## 6 12/12/2013 00:00:30 1.673890 0.4 0.1 56.7
## US1.pulses X1kg.sum X1kg.index X X1kg.index.1 X.1 LC.kg.1
## 1 295 0.0 1 NA 1 12/12/2013 0:00 1.30
## 2 255 0.2 1 NA 2 12/12/2013 0:02 2.49
## 3 272 0.6 1 NA 3 12/12/2013 0:04 3.65
## 4 221 1.0 1 NA 4 12/12/2013 0:05 4.66
## 5 230 1.3 1 NA 5 12/12/2013 0:07 5.90
## 6 274 0.4 2 NA 6 12/12/2013 0:08 7.25
## LC.change.kg.1 LC.flux.kg.sm L7.cm US1.pulses.1
## 1 0.00 0 0
## 2 1.19 0.09 0 0
## 3 1.15 0.09 0 0
## 4 1.01 0.10 0 0
## 5 1.25 0.13 0 0
## 6 1.35 0.14 0 0
# Selecting relevant columns
dat = data.frame(
date = dat[, 1],
weight = dat[, 2],
Hp = dat[, 4]
)
head(dat)## date weight Hp
## 1 12/11/2013 23:58:00 0.029397 0.0
## 2 12/11/2013 23:58:30 0.271035 0.1
## 3 12/11/2013 23:59:00 0.626709 0.1
## 4 12/11/2013 23:59:30 1.008832 0.1
## 5 12/12/2013 00:00:00 1.303752 0.1
## 6 12/12/2013 00:00:30 1.673890 0.1
# Filtering empty incomplete rows
dat = dat[complete.cases(dat), ]
# Formatting date-time
dat$date = as.POSIXlt(paste0(dat$date, ".00"), format = "%m/%d/%Y %H:%M:%S")
dat$date = as.POSIXct(dat$date)# Set threshold
threshold = 4
# Set ID for each period where cumulative weight surpasses threshold
dat$flag = c(FALSE, diff(dat$weight %% threshold) < 0)
dat$id = c(0, cumsum(dat$flag)[-nrow(dat)])
head(dat, 30)## date weight Hp flag id
## 1 2013-12-11 23:58:00 0.029397 0.0 FALSE 0
## 2 2013-12-11 23:58:30 0.271035 0.1 FALSE 0
## 3 2013-12-11 23:59:00 0.626709 0.1 FALSE 0
## 4 2013-12-11 23:59:30 1.008832 0.1 FALSE 0
## 5 2013-12-12 00:00:00 1.303752 0.1 FALSE 0
## 6 2013-12-12 00:00:30 1.673890 0.1 FALSE 0
## 7 2013-12-12 00:01:00 1.922267 0.1 FALSE 0
## 8 2013-12-12 00:01:30 2.265886 0.1 FALSE 0
## 9 2013-12-12 00:02:00 2.494657 0.1 FALSE 0
## 10 2013-12-12 00:02:30 2.740011 0.1 FALSE 0
## 11 2013-12-12 00:03:00 3.039252 0.1 FALSE 0
## 12 2013-12-12 00:03:30 3.407551 0.1 FALSE 0
## 13 2013-12-12 00:04:00 3.648546 0.1 FALSE 0
## 14 2013-12-12 00:04:30 3.911977 0.1 FALSE 0
## 15 2013-12-12 00:05:00 4.343797 0.1 TRUE 0
## 16 2013-12-12 00:05:30 4.656618 0.1 FALSE 1
## 17 2013-12-12 00:06:00 5.123615 0.1 FALSE 1
## 18 2013-12-12 00:06:30 5.425759 0.1 FALSE 1
## 19 2013-12-12 00:07:00 5.901873 0.1 FALSE 1
## 20 2013-12-12 00:07:30 6.379673 0.1 FALSE 1
## 21 2013-12-12 00:08:00 6.821696 0.1 FALSE 1
## 22 2013-12-12 00:08:30 7.249256 0.1 FALSE 1
## 23 2013-12-12 00:09:00 7.789042 0.2 FALSE 1
## 24 2013-12-12 00:09:30 8.338421 0.2 TRUE 1
## 25 2013-12-12 00:10:00 8.726854 0.1 FALSE 2
## 26 2013-12-12 00:10:30 9.279896 0.2 FALSE 2
## 27 2013-12-12 00:11:00 9.754594 0.1 FALSE 2
## 28 2013-12-12 00:11:30 10.426149 0.2 FALSE 2
## 29 2013-12-12 00:12:00 10.995783 0.2 FALSE 2
## 30 2013-12-12 00:12:30 11.565582 0.2 FALSE 2
# Get cumulative weight and summed Hp per ID
dat = ddply(
dat,
"id",
summarise,
weight = weight[length(weight)], # Last value
Hp = sum(Hp) # Sum
)
head(dat, 30)## id weight Hp
## 1 0 4.343797 1.4
## 2 1 8.338421 1.1
## 3 2 12.157223 1.2
## 4 3 16.055236 1.2
## 5 4 20.282656 1.4
## 6 5 24.394645 1.0
## 7 6 28.290765 1.3
## 8 7 32.094301 1.3
## 9 8 36.062811 0.9
## 10 9 40.179402 0.7
## 11 10 44.064843 1.1
## 12 11 48.172683 1.6
## 13 12 52.020630 1.0
## 14 13 56.017851 1.3
## 15 14 60.145000 1.2
## 16 15 64.116543 1.1
## 17 16 68.437267 1.4
## 18 17 72.371511 1.0
## 19 18 76.597305 1.2
## 20 19 80.062430 1.0
## 21 20 84.463797 1.4
## 22 21 88.137865 1.1
## 23 22 92.575899 1.3
## 24 23 97.197977 1.5
## 25 24 101.394744 1.3
## 26 25 105.623335 1.3
## 27 26 109.106360 1.0
## 28 27 112.066772 0.8
## 29 28 116.257419 1.3
## 30 29 120.323943 1.2