Polynomials and Splines: Fractional polynomials example data set

References

Load packages

## Load ggplot2 for plotting
library(ggplot2)
## Load gridExtra for fine tuning
library(gridExtra)
## Load gam for GAM
library(gam)
## Load mfp for automated fractional polynomials
library(mfp)

Load a hypothetical dataset

NAME:  Fractional polynomials example data set (FPEXAMPLE.DAT)
SIZE:  100 observations, 3 variables.
SOURCE: The data in the file fpexample.dat are used in the first example
        in the paper Hosmer, D.W and Royston, P.R. (2003) 
    "Using Fractional Polynomials to Model Continuous Covariates in 
    Regression Analysis".  
DESCRIPTIVE ABSTRACT:
These data are hypothetical and were computer generated to follow a (-1,-1)
fractional polynomial model.
LIST OF VARIABLES:
Variable    Description
_____________________________________
ID      Identification code
X       Independent variable
Y       Outcome variable
_____________________________________
## Load dataset
fpeg <- read.table("http://www.umass.edu/statdata/statdata/data/fpexample.dat", col.names = c("ID","X","Y"))
## Modify dataset
fpeg <- within(fpeg, {
    ## Change to lower cases
    x <- X
    y <- Y
    ## Category indicator variable
    indicator <- cut(X, breaks = c(-Inf,20,40,60,80,Inf), labels = c("i1","i2","i3","i4","i5"))
    ## Variables for spline
    s1        <- pmax(X -  0, 0)
    s2        <- pmax(X - 20, 0)
    s3        <- pmax(X - 40, 0)
    s4        <- pmax(X - 60, 0)
    s5        <- pmax(X - 80, 0)    
})
## Plot raw data as a scatter plot
## pdf(file = "./20131216DoseResponseGraphs2.pdf", width = 7, height = 7)
ggplot(data = fpeg,
       mapping = aes(x = x, y = y)) +
    layer(geom = "point") +
    scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) + 
    theme_bw() +
    theme(legend.key = element_blank())

plot of chunk unnamed-chunk-3

Define functions for plotting and prediction

## Define a helper function for ggplot2
stat_fun <- function(fun, args, mapping) {
    stat_function(fun = fun, args = args, mapping = mapping, n = 1001, size = 1.5, alpha = 1)
}

## Define prediction function
PredFun <- function(x, model) {
    ## Category indicator variable    
    indicator <- cut(x, breaks = c(-Inf,20,40,60,80,Inf), labels = c("i1","i2","i3","i4","i5"))
    ## Variables for spline
    s1        <- pmax(x -  0, 0)
    s2        <- pmax(x - 20, 0)
    s3        <- pmax(x - 40, 0)
    s4        <- pmax(x - 60, 0)
    s5        <- pmax(x - 80, 0)    
    ## Predict
    predict(object = model, newdata = data.frame(x = x,
                                indicator = indicator,
                                s1 = s1, s2 = s2, s3 = s3, s4 = s4, s5 = s5))
}

Polynomial models

Higher order polynomials and fractional polynomials make the dose response relationship more flexible.

## Linear model
lmLinear <- lm(formula = y ~ x,
               data    = fpeg)
## Quadratic model
lmQuadratic <- lm(formula = y ~ x + I(x^2),
                  data    = fpeg)
## Cubit model
lmCubic <- lm(formula = y ~ x + I(x^2) + I(x^3),
              data    = fpeg)
## Fractional polynomial model
lmFractional <- lm(formula = y ~ x + I(x^2) + I(x^(1/2)),
                   data    = fpeg)

## Create plot base
plotBasePoly <- ggplot(data = fpeg,
       mapping = aes(x = x, y = y)) +
    layer(geom = "point") +
    scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +     
    scale_color_discrete(name = "model", breaks =  c("Linear","Quadratic","Cubic","Fractional")) +
    theme_bw() +
    theme(legend.key = element_blank())

## Create layers
layerPolyLinear     <- stat_fun(fun = PredFun, args = list(model = lmLinear),     mapping = aes(color = "Linear"))
layerPolyQuadratic  <- stat_fun(fun = PredFun, args = list(model = lmQuadratic),  mapping = aes(color = "Quadratic"))
layerPolyCubic      <- stat_fun(fun = PredFun, args = list(model = lmCubic),      mapping = aes(color = "Cubic"))
layerPolyFractional <- stat_fun(fun = PredFun, args = list(model = lmFractional), mapping = aes(color = "Fractional"))

## Plot together
plotBasePoly + layerPolyLinear + layerPolyQuadratic + layerPolyCubic + layerPolyFractional

plot of chunk unnamed-chunk-5


## Plot separately
plotPoly1 <- plotBasePoly + layerPolyLinear
plotPoly2 <- plotBasePoly + layerPolyQuadratic
plotPoly3 <- plotBasePoly + layerPolyCubic
plotPoly4 <- plotBasePoly + layerPolyFractional
grid.arrange(plotPoly1,plotPoly2,plotPoly3,plotPoly4)

plot of chunk unnamed-chunk-5

Spline models

The “spline family” models fit category specific models. The simplest case is the category-specific intercept only models, i.e., category indicator model. Category specific slopes result in the linear spline model.

## Category indicator model
lmInterceptSpline <- lm(formula = y ~ indicator,
                        data    = fpeg)
## Linear spline model
lmLinearSpline <- lm(formula = y ~ x + s2 + s3 + s4 + s5,
                     data    = fpeg)
## Quadratic spline model
lmQuadraticSpline <- lm(formula = y ~ x + I(x^2) + I(s2^2) + I(s3^2) + I(s4^2) + I(s5^2),
                        data    = fpeg)
## Cubic spline model
lmCubicSpline <- lm(formula = y ~ x + I(x^2) + I(x^3) + I(s2^3) + I(s3^3) + I(s4^3) + I(s5^3),
                    data    = fpeg)

## Create plot base
plotBaseSpline <- ggplot(data = fpeg,
       mapping = aes(x = x, y = y)) +
    layer(geom = "point") +
    scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +     
    scale_color_discrete(name = "model", breaks =  c("Intercept","Linear","Quadratic","Cubic")) +
    theme_bw() +
    theme(legend.key = element_blank())

## Create layers
layerSpIntercept <- stat_fun(fun = PredFun, args = list(model = lmInterceptSpline), mapping = aes(color = "Intercept"))
layerSpLinear    <- stat_fun(fun = PredFun, args = list(model = lmLinearSpline),    mapping = aes(color = "Linear"))
layerSpQuadratic <- stat_fun(fun = PredFun, args = list(model = lmQuadraticSpline), mapping = aes(color = "Quadratic"))
layerSpCubic     <- stat_fun(fun = PredFun, args = list(model = lmCubicSpline),     mapping = aes(color = "Cubic"))

## Plot together
plotBaseSpline + layerSpIntercept + layerSpLinear + layerSpQuadratic + layerSpCubic

plot of chunk unnamed-chunk-6


## Plot separately
plotSpline1 <- plotBaseSpline + layerSpIntercept
plotSpline2 <- plotBaseSpline + layerSpLinear
plotSpline3 <- plotBaseSpline + layerSpQuadratic
plotSpline4 <- plotBaseSpline + layerSpCubic
grid.arrange(plotSpline1,plotSpline2,plotSpline3,plotSpline4)

plot of chunk unnamed-chunk-6

## dev.off()

Non-parametric model: Generalized additive model (GAM)

## Fit GAM
gamOne <- gam(formula = y ~ s(x),
              data = fpeg)

## Create plot base for GAM
plotBaseGam <- ggplot(data = fpeg,
       mapping = aes(x = x, y = y)) +
    layer(geom = "point") +
    scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +     
    scale_color_discrete(name = "model", breaks =  c("Intercept","Linear","Quadratic","Cubic","GAM")) +
    theme_bw() +
    theme(legend.key = element_blank())

## Create a GAM layer
layerGam1 <- stat_fun(fun = PredFun, args = list(model = gamOne),     mapping = aes(color = "GAM"))

## Show with splines
plotBaseGam + layerSpIntercept + layerSpLinear + layerSpQuadratic + layerGam1

plot of chunk unnamed-chunk-7


## Plot separately
plotGam1 <- plotBaseGam + layerGam1
grid.arrange(plotSpline1,plotSpline2,plotSpline3,plotGam1)

plot of chunk unnamed-chunk-7

Automated fractional polynomial fitting

## Automatically fit fractional polynomials
mfpOne <- mfp(formula = y ~ fp(x, df = 4),
              data = fpeg)
## Check model for transformation
summary(mfpOne)
## 
## Call:
## glm(formula = y ~ I((x/100)^-1) + I((x/100)^-1 * log((x/100))), 
##     data = fpeg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -10.370   -3.312   -0.143    3.553   10.830  
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     9.674      1.402     6.9  5.4e-10 ***
## I((x/100)^-1)                  10.050      0.747    13.4  < 2e-16 ***
## I((x/100)^-1 * log((x/100)))    3.138      0.233    13.5  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 23.56)
## 
##     Null deviance: 6576.8  on 99  degrees of freedom
## Residual deviance: 2285.3  on 97  degrees of freedom
## AIC: 604.7
## 
## Number of Fisher Scoring iterations: 2

## Create plot base for mfp
plotBaseMfp <- ggplot(data = fpeg,
       mapping = aes(x = x, y = y)) +
    layer(geom = "point") +
    scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +     
    scale_color_discrete(name = "model", breaks =  c("Intercept","Linear","Quadratic","Cubic","GAM","mfp")) +
    theme_bw() +
    theme(legend.key = element_blank())

## Create a mfp layer
layerMfp <- stat_fun(fun = PredFun, args = list(model = mfpOne),     mapping = aes(color = "mfp"))

## Show with splines
plotBaseMfp + layerSpIntercept + layerSpLinear + layerSpQuadratic + layerGam1 + layerMfp

plot of chunk unnamed-chunk-8


## Plot separately
plotMfp1 <- plotBaseMfp + layerMfp
grid.arrange(plotSpline2,plotSpline3,plotGam1, plotMfp1)

plot of chunk unnamed-chunk-8

FitLayer function for live demonstration

## Create plot base
plotBase <- ggplot(data = fpeg,
                      mapping = aes(x = x, y = y)) +
    layer(geom = "point") +
    scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +     
    scale_color_discrete(name = "model") +
    theme_bw() +
    theme(legend.key = element_blank())

## Define a "fit and plot" function
FitLayer <- function(formula, fitFun = lm, print = FALSE) {
    ## Fit model
    modelFit <- fitFun(formula = formula,
                       data    = fpeg)
    ## Print result
    if (print == TRUE) print(summary(modelFit))
    ## Return a layer
    stat_fun(fun = PredFun, args = list(model = modelFit), mapping = aes(x = x))
}
## Linear term only
plotBase + FitLayer(y ~ x)

plot of chunk unnamed-chunk-10

## Quadratic
plotBase + FitLayer(y ~ x + I(x^2))

plot of chunk unnamed-chunk-10

## Cubic
plotBase + FitLayer(y ~ x + I(x^2) + I(x^3))

plot of chunk unnamed-chunk-10

## Fractional
plotBase + FitLayer(y ~ x + I(x^2) + I(x^(1/2)))

plot of chunk unnamed-chunk-10

## Multiple Fractional Polynomial Model (Automatic)
plotBase + FitLayer(y ~ fp(x), mfp, print = T)
## 
## Call:
## glm(formula = y ~ I((x/100)^-1) + I((x/100)^-1 * log((x/100))), 
##     data = fpeg)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -10.370   -3.312   -0.143    3.553   10.830  
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     9.674      1.402     6.9  5.4e-10 ***
## I((x/100)^-1)                  10.050      0.747    13.4  < 2e-16 ***
## I((x/100)^-1 * log((x/100)))    3.138      0.233    13.5  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 23.56)
## 
##     Null deviance: 6576.8  on 99  degrees of freedom
## Residual deviance: 2285.3  on 97  degrees of freedom
## AIC: 604.7
## 
## Number of Fisher Scoring iterations: 2

plot of chunk unnamed-chunk-10


## "Intercept" spline (category indicator)
plotBase + FitLayer(y ~ indicator)

plot of chunk unnamed-chunk-10

## Linear spline
plotBase + FitLayer(y ~ x + s2 + s3 + s4 + s5)

plot of chunk unnamed-chunk-10


## Quadratic spline (unrestricted)
plotBase + FitLayer(y ~ x + I(x^2) + I(s2^2) + I(s3^2) + I(s4^2) + I(s5^2))

plot of chunk unnamed-chunk-10

## Quadratic spline restricted with 1st segment linear restriction
plotBase + FitLayer(y ~ x +        + I(s2^2) + I(s3^2) + I(s4^2) + I(s5^2))

plot of chunk unnamed-chunk-10

## Quadratic spline restricted with 1st & last segment linear restriction
plotBase + FitLayer(y ~ x +        + I(s2^2 - s5^2) + I(s3^2 - s5^2) + I(s4^2 - s5^2))

plot of chunk unnamed-chunk-10


## Cubic spline (unrestricted)
plotBase + FitLayer(y ~ x + I(x^2) + I(x^3) + I(s2^3) + I(s3^3) + I(s4^3) + I(s5^3))

plot of chunk unnamed-chunk-10


## Generalized Additive Model (Nonparametric smoother)
plotBase + FitLayer(y ~ s(x), gam)

plot of chunk unnamed-chunk-10


## Spline with splines::bs(): Linear spline with 1 equally-sized segment (df-degree knots)
plotBase + FitLayer(y ~ bs(x, df = 1, knots = NULL, degree = 1))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Linear spline with 2 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 2, knots = NULL, degree = 1))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Linear spline with 3 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 3, knots = NULL, degree = 1))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Linear spline with 4 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 4, knots = NULL, degree = 1))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Linear spline with 5 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 5, knots = NULL, degree = 1))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Linear spline with fixed boundaries
plotBase + FitLayer(y ~ bs(x, df = NULL, knots = c(20,40,60,80), degree = 1))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Linear spline with 10 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 10, knots = NULL, degree = 1))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Linear spline with 20 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 20, knots = NULL, degree = 1))

plot of chunk unnamed-chunk-10



## Spline with splines::bs(): Quadratic spline with 1 equally-sized segment (df-degree knots)
plotBase + FitLayer(y ~ bs(x, df = 2, knots = NULL, degree = 2))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Quadratic spline with 2 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 3, knots = NULL, degree = 2))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Quadratic spline with 3 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 4, knots = NULL, degree = 2))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Quadratic spline with 4 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 5, knots = NULL, degree = 2))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Quadratic spline with 5 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 6, knots = NULL, degree = 2))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Quadratic spline with 10 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 11, knots = NULL, degree = 2))

plot of chunk unnamed-chunk-10

## Spline with splines::bs(): Quadratic spline with 20 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 21, knots = NULL, degree = 2))

plot of chunk unnamed-chunk-10