To find (almost) the best span for LOESS by cross validation
#----------------------------------------------------------------------#
# fit data points with LOESS + cross validation
#----------------------------------------------------------------------#
library(bootstrap)
loess_wrapper_extrapolate <- function (x, y, span.vals = seq(0.25, 1, by = 0.05), folds = 5){
# Do model selection using mean absolute error, which is more robust than squared error.
mean.abs.error <- numeric(length(span.vals))
# Quantify error for each span, using CV
loess.model <- function(x, y, span){
loess(y ~ x, span = span, control=loess.control(surface="direct"))
}
loess.predict <- function(fit, newdata) {
predict(fit, newdata = newdata)
}
span.index <- 0
for (each.span in span.vals) {
span.index <- span.index + 1
y.hat.cv <- crossval(x, y, theta.fit = loess.model, theta.predict = loess.predict, span = each.span, ngroup = folds)$cv.fit
non.empty.indices <- !is.na(y.hat.cv)
mean.abs.error[span.index] <- mean(abs(y[non.empty.indices] - y.hat.cv[non.empty.indices]))
}
# find the span which minimizes error
best.span <- span.vals[which.min(mean.abs.error)]
# fit and return the best model
best.model <- loess(y ~ x, span = best.span, control=loess.control(surface="direct"))
return(best.model)
}
An example:
x = seq(0, 4, by=0.01)
y = sin(x**2) + x**1/2
plot(x, y)
## loess with default parameter
model.1 <- loess(y ~ x)
lines(x, model.1$fitted, col='blue')
## loess with CV
model.2 <- loess_wrapper_extrapolate(x, y)
lines(x, model.2$fitted, col='red')