library(plyr)
# Sample data
dat = data.frame(
time = c(2,3,4,1,2,3,4,3,4,5),
percent = c(2,23,100,7,40,60,100,40,60,100),
variable = c(rep("a", 3), rep("b", 4), rep("c", 3))
)
dat
## time percent variable
## 1 2 2 a
## 2 3 23 a
## 3 4 100 a
## 4 1 7 b
## 5 2 40 b
## 6 3 60 b
## 7 4 100 b
## 8 3 40 c
## 9 4 60 c
## 10 5 100 c
# Expand to fill missing cases
grid = expand.grid(
time = seq(min(dat$time), max(dat$time)),
variable = unique(dat$variable)
)
dat = merge(grid, dat, by = c("variable", "time"), all.x = TRUE)
# Fill with 0 or 100
dat = ddply(
dat,
"variable",
transform,
flag = as.logical(cumprod(is.na(percent)))
)
dat$percent[dat$flag & is.na(dat$percent)] = 0
dat$percent[!dat$flag & is.na(dat$percent)] = 100
dat$flag = NULL
# Result
dat
## variable time percent
## 1 a 1 0
## 2 a 2 2
## 3 a 3 23
## 4 a 4 100
## 5 a 5 100
## 6 b 1 7
## 7 b 2 40
## 8 b 3 60
## 9 b 4 100
## 10 b 5 100
## 11 c 1 0
## 12 c 2 0
## 13 c 3 40
## 14 c 4 60
## 15 c 5 100