# last observation moved forward

## my first attempt (repeat.before)

``````repeat.before = function(x) {   # repeats the last non NA value. Keeps leading NA
ind = which(!is.na(x))      # get positions of nonmissing values
if(is.na(x[1]))             # if it begins with a missing, add the
ind = c(1,ind)        # first position to the indices
rep(x[ind], times = diff(   # repeat the values at these indices
c(ind, length(x) + 1) )) # diffing the indices + length yields how often
}                               # they need to be repeated``````

## my second attempt (repeat_last)

``````repeat_last = function(x, forward = TRUE, maxgap = Inf, na.rm = FALSE) {   # repeats the last non NA value.
if (!forward) x = rev(x)           # reverse x twice if carrying backward
ind = which(!is.na(x))             # get positions of nonmissing values
if (is.na(x[1]) && !na.rm)         # if it begins with NA
ind = c(1,ind)                           # add first pos
rep_times = diff(                  # diffing the indices + length yields how often
c(ind, length(x) + 1) )          # they need to be repeated
if (maxgap < Inf) {
exceed = rep_times - 1 > maxgap  # exceeding maxgap
if (any(exceed)) {               # any exceed?
ind = sort(c(ind[exceed] + 1, ind))      # add NA following large gaps to indices
rep_times = diff(c(ind, length(x) + 1) ) # diff again
}
}
x = rep(x[ind], times = rep_times) # repeat the values at these indices
if (!forward) x = rev(x)           # second reversion
x
}``````

## na.lomf

``````# replaces all NA values with last non-NA values
na.lomf <- function(x) {

na.lomf.0 <- function(x) {
non.na.idx <- which(!is.na(x))
if (is.na(x[1L])) {
non.na.idx <- c(1L, non.na.idx)
}
rep.int(x[non.na.idx], diff(c(non.na.idx, length(x) + 1L)))
}

dim.len <- length(dim(x))

if (dim.len == 0L) {
na.lomf.0(x)
} else {
apply(x, dim.len, na.lomf.0)
}
}``````

## replaceNAWithLatest

``require(data.table)``
``## Loading required package: data.table``
``````replaceNaWithLatest <- function(
dfIn,
nameColNa = names(dfIn)[1]
){
dtTest <- data.table(dfIn)
setnames(dtTest, nameColNa, "colNa")
dtTest[, segment := cumsum(!is.na(colNa))]
dtTest[, colNa := colNa[1], by = "segment"]
dtTest[, segment := NULL]
setnames(dtTest, "colNa", nameColNa)
return(dtTest)
}``````

## replace_na_with_last

attention: carries forward and backward in case of NAs at beginning

``````replace_na_with_last<-function(x,a=!is.na(x)){
x[which(a)[c(1,1:sum(a))][cumsum(a)+1]]
}``````

## na_locf (Rcpp)

``````library(Rcpp)
cppFunction('IntegerVector na_locf(IntegerVector x) {
int n = x.size();

for(int i = 0; i<n; i++) {
if((i > 0) && (x[i] == NA_INTEGER) & (x[i-1] != NA_INTEGER)) {
x[i] = x[i-1];
}
}
return x;
}')

cppFunction('NumericVector na_locf_numeric(NumericVector x) {
int n = x.size();
LogicalVector ina = is_na(x);

for(int i = 1; i<n; i++) {
if((ina[i] == TRUE) & (ina[i-1] != TRUE)) {
x[i] = x[i-1];
}
}
return x;
}')``````

## tidyr::fill

``tidyr_fill = tidyr:::fillDown``

## edge cases, equality

``````edge_cases = function(x) {
ref = zoo::na.locf(x, na.rm = F)
results = c(
tryCatch(all.equal(ref, repeat.before(x)), error = warning)
,tryCatch(all.equal(ref, na_locf(x)), error = warning)
,tryCatch(all.equal(ref, na_locf_numeric(x)), error = warning)
,tryCatch(all.equal(ref, spacetime::na.locf(x,na.rm=F)), error = warning)
,tryCatch(all.equal(ref, imputeTS::na.locf(x)), error = warning)
,tryCatch(all.equal(ref, na.lomf(x)), error = warning)
,tryCatch(all.equal(ref, replace_na_with_last(x)), error = warning)
,tryCatch(all.equal(ref, tidyr_fill(x)), error = warning)
)
names(results) = c("repeat.before","na_locf", "na_locf_numeric", "spacetime", "imputeTS","na.lomf", "replace_na_with_last","tidyr_fill")
if(all(results == TRUE)) T else results[results!=T]
}

edge_cases(c(NA,1))``````
``````##                                           imputeTS
## "'is.NA' value mismatch: 0 in current 1 in target"
##                               replace_na_with_last
## "'is.NA' value mismatch: 0 in current 1 in target"``````
``edge_cases(c(1, NA))``
``## [1] TRUE``
``edge_cases(c( NA))``
``````##                                 na_locf
##               "Modes: logical, numeric"
##                         na_locf_numeric
## "target is logical, current is numeric"
##                               spacetime
##               "Modes: logical, numeric"
##                                imputeTS
## "target is logical, current is numeric"
##                    replace_na_with_last
##                "Input x is not numeric"``````
``edge_cases(c( NA, T))``
``````##                                            na_locf
##                          "Modes: logical, numeric"
##                                    na_locf_numeric
##            "target is logical, current is numeric"
##                                          spacetime
##                          "Modes: logical, numeric"
##                                           imputeTS
##            "target is logical, current is numeric"
##                               replace_na_with_last
##                           "Input x is not numeric"
##                                               <NA>
## "'is.NA' value mismatch: 0 in current 1 in target"``````
``edge_cases(c( T, NA))``
``````##                                 na_locf
##               "Modes: logical, numeric"
##                         na_locf_numeric
## "target is logical, current is numeric"
##                               spacetime
##               "Modes: logical, numeric"
##                                imputeTS
## "target is logical, current is numeric"
##                    replace_na_with_last
##                "Input x is not numeric"``````
``edge_cases(numeric())``
``## [1] TRUE``
``edge_cases(character())``
``````##                              na_locf                      na_locf_numeric
## "not compatible with requested type" "not compatible with requested type"``````
``edge_cases(c(NA, "1", NA))``
``````##                                            na_locf
##               "not compatible with requested type"
##                                    na_locf_numeric
##               "not compatible with requested type"
##                                           imputeTS
##                           "Input x is not numeric"
##                               replace_na_with_last
## "'is.NA' value mismatch: 0 in current 1 in target"``````
``edge_cases(as.Date(c(NA, "1970-01-01" , NA)))``
``````##                                               imputeTS
##                               "Input x is not numeric"
##                                                na.lomf
##                    "Attributes: < Modes: list, NULL >"
##                                   replace_na_with_last
##                        "Attributes: < Lengths: 1, 0 >"
##                                             tidyr_fill
## "Attributes: < names for target but not for current >"
##                                                   <NA>
##             "Attributes: < current is not list-like >"
##                                                   <NA>
##                   "target is Date, current is numeric"
##                                                   <NA>
##     "'is.NA' value mismatch: 0 in current 1 in target"``````
``edge_cases(c(NA, 100.1000, NA))``
``````##                                            na_locf
##                  "Mean relative difference: 0.001"
##                                           imputeTS
## "'is.NA' value mismatch: 0 in current 1 in target"
##                               replace_na_with_last
## "'is.NA' value mismatch: 0 in current 1 in target"``````

## benchmark

``````x = c(NA,NA,1,NA,NA,NA,NA,NA,NA,NA,NA,2,3,4,NA,NA,NA,NA,NA,5)

library(microbenchmark)
bench_em <- function(x, times = 2000, count = 1000) {
x <- sample(x,count,replace = TRUE)
res = microbenchmark(
na_locf(x),
na.lomf(x),
zoo::na.locf(x, na.rm = F),
spacetime::na.locf(x,na.rm=F),
imputeTS::na.locf(x, na.remaining = "keep"),
repeat.before(x),
repeat_last(x),
tidyr_fill(x),
times = times)
print(res, order = "mean", digits = 1)
ggplot2::autoplot(res)
}
bench_em(x)``````
``````## Unit: microseconds
##                                         expr min  lq mean median  uq   max
##                                   na_locf(x)   6  10   15     12  15  1561
##                                tidyr_fill(x)   6  11   19     14  19  1678
##                             repeat.before(x)  21  28   46     34  44  1940
##                               repeat_last(x)  22  30   50     36  48  2018
##                                   na.lomf(x)  22  30   92     37  47 74631
##                   zoo::na.locf(x, na.rm = F)  75  92  147    112 144  2174
##             spacetime::na.locf(x, na.rm = F)  78  94  159    113 147  2217
##  imputeTS::na.locf(x, na.remaining = "keep") 149 174  260    213 269  2302
##  neval   cld
##   2000 a
##   2000 a
##   2000 ab
##   2000 ab
##   2000  bc
##   2000   cd
##   2000    d
##   2000     e``````