Polynomials and Splines

References

Greenland S. Dose-response and trend analysis in epidemiology: alternatives to categorical analysis. http://www.ncbi.nlm.nih.gov/pubmed/7548341

Load packages

## Load datasets package
library(datasets)
## Load ggplot2 for plotting
library(ggplot2)
## Load gridExtra for fine tuning
library(gridExtra)

Load Old Faithful Geyser Data

Description:
     Waiting time between eruptions and the duration of the eruption
     for the Old Faithful geyser in Yellowstone National Park, Wyoming,
     USA.
Format:
     A data frame with 272 observations on 2 variables.
       [,1]  eruptions  numeric  Eruption time in mins
       [,2]  waiting    numeric  Waiting time to next eruption (in mins)

## Load
data(faithful)
## Modify dataset
faithful <- within(faithful, {
    ## Rename for convenience
    x         <- eruptions
    y         <- waiting
    ## Category indicator variable
    indicator <- cut(x, breaks = c(-Inf,2.5,3.5,4.5,Inf), labels = c("i1","i2","i3","i4"))
    ## Variables for spline
    s1        <- pmax(x - 1.5, 0)
    s2        <- pmax(x - 2.5, 0)
    s3        <- pmax(x - 3.5, 0)
    s4        <- pmax(x - 4.5, 0)
})
## Plot raw data as a scatter plot
ggplot(data = faithful,
       mapping = aes(x = x, y = y)) +
    layer(geom = "point") +
    theme_bw() +
    theme(legend.key = element_blank())

plot of chunk unnamed-chunk-3

Define functions for plotting and prediction

## Define a helper function for ggplot2
stat_fun <- function(fun, args, mapping) {
    stat_function(fun = fun, args = args, mapping = mapping, n = 1001, size = 1.5, alpha = 1)
}

## Define prediction function
PredFun <- function(x, model) {
    ## Category indicator variable
    indicator <- cut(x, breaks = c(-Inf,2.5,3.5,4.5,Inf), labels = c("i1","i2","i3","i4"))
    ## Variables for spline
    s1 <- pmax(x - 1.5, 0)
    s2 <- pmax(x - 2.5, 0)
    s3 <- pmax(x - 3.5, 0)
    s4 <- pmax(x - 4.5, 0)
    ## Predict
    predict(object = model, newdata = data.frame(x = x, indicator = indicator, s1 = s1, s2 = s2, s3 = s3, s4 = s4))
}

Polynomial models

Higher order polynomials and fractional polynomials make the dose response relationship more flexible.

## Linear model
lmLinear <- lm(formula = y ~ x,
               data    = faithful)
## Quadratic model
lmQuadratic <- lm(formula = y ~ x + I(x^2),
                  data    = faithful)
## Cubit model
lmCubic <- lm(formula = y ~ x + I(x^2) + I(x^3),
              data    = faithful)
## Fractional polynomial model
lmFractional <- lm(formula = y ~ x + I(x^2) + I(x^(1/2)),
                   data    = faithful)

## Create plot base
plotBasePoly <- ggplot(data = faithful,
       mapping = aes(x = x, y = y)) +
    layer(geom = "point") +
    scale_color_discrete(name = "model", breaks =  c("Linear","Quadratic","Cubic","Fractional")) +
    theme_bw() +
    theme(legend.key = element_blank())

## Create layers
layerPolyLinear     <- stat_fun(fun = PredFun, args = list(model = lmLinear),     mapping = aes(color = "Linear"))
layerPolyQuadratic  <- stat_fun(fun = PredFun, args = list(model = lmQuadratic),  mapping = aes(color = "Quadratic"))
layerPolyCubic      <- stat_fun(fun = PredFun, args = list(model = lmCubic),      mapping = aes(color = "Cubic"))
layerPolyFractional <- stat_fun(fun = PredFun, args = list(model = lmFractional), mapping = aes(color = "Fractional"))

## Plot together
plotBasePoly + layerPolyLinear + layerPolyQuadratic + layerPolyCubic + layerPolyFractional

plot of chunk unnamed-chunk-5


## Plot separately
plotPoly1 <- plotBasePoly + layerPolyLinear
plotPoly2 <- plotBasePoly + layerPolyQuadratic
plotPoly3 <- plotBasePoly + layerPolyCubic
plotPoly4 <- plotBasePoly + layerPolyFractional
grid.arrange(plotPoly1,plotPoly2,plotPoly3,plotPoly4)

plot of chunk unnamed-chunk-5

Spline models

The “spline family” models fit category specific models. The simplest case is the category-specific intercept only models, i.e., category indicator model. Category specific slopes result in the linear spline model.

## Category indicator model
lmInterceptSpline <- lm(formula = y ~ indicator,
                        data    = faithful)
## Linear spline model
lmLinearSpline <- lm(formula = y ~ x + s2 + s3 + s4,
                     data    = faithful)
## Quadratic spline model
lmQuadraticSpline <- lm(formula = y ~ x + I(x^2) + I(s2^2) + I(s3^2) + I(s3^2),
                        data    = faithful)
## Cubic spline model
lmCubicSpline <- lm(formula = y ~ x + I(x^2) + I(x^3) + I(s2^3) + I(s3^3) + I(s3^3),
                    data    = faithful)

## Create plot base
plotBaseSpline <- ggplot(data = faithful,
       mapping = aes(x = x, y = y)) +
    layer(geom = "point") +
    scale_color_discrete(name = "model", breaks =  c("Intercept","Linear","Quadratic","Cubic")) +
    theme_bw() +
    theme(legend.key = element_blank())

## Create layers
layerSpIntercept <- stat_fun(fun = PredFun, args = list(model = lmInterceptSpline), mapping = aes(color = "Intercept"))
layerSpLinear    <- stat_fun(fun = PredFun, args = list(model = lmLinearSpline),    mapping = aes(color = "Linear"))
layerSpQuadratic <- stat_fun(fun = PredFun, args = list(model = lmQuadraticSpline), mapping = aes(color = "Quadratic"))
layerSpCubic     <- stat_fun(fun = PredFun, args = list(model = lmCubicSpline),     mapping = aes(color = "Cubic"))

## Plot together
plotBaseSpline + layerSpIntercept + layerSpLinear + layerSpQuadratic + layerSpCubic

plot of chunk unnamed-chunk-6


## Plot separately
plotSpline1 <- plotBaseSpline + layerSpIntercept
plotSpline2 <- plotBaseSpline + layerSpLinear
plotSpline3 <- plotBaseSpline + layerSpQuadratic
plotSpline4 <- plotBaseSpline + layerSpCubic
grid.arrange(plotSpline1,plotSpline2,plotSpline3,plotSpline4)

plot of chunk unnamed-chunk-6

Table of Contents: http://rpubs.com/kaz_yos/toc
Table of Contents (Japanese): http://rpubs.com/kaz_yos/toc-j
Top Page: http://rpubs.com/kaz_yos/
SlideShare: http://www.slideshare.net/kaz_yos