Greenland S. Dose-response and trend analysis in epidemiology: alternatives to categorical analysis. http://www.ncbi.nlm.nih.gov/pubmed/7548341
Fractional polynomials example data set: http://www.umass.edu/statdata/statdata/data/fpexample.txt
## Load ggplot2 for plotting
library(ggplot2)
## Load gridExtra for fine tuning
library(gridExtra)
## Load gam for GAM
library(gam)
## Load mfp for automated fractional polynomials
library(mfp)
NAME: Fractional polynomials example data set (FPEXAMPLE.DAT)
SIZE: 100 observations, 3 variables.
SOURCE: The data in the file fpexample.dat are used in the first example
in the paper Hosmer, D.W and Royston, P.R. (2003)
"Using Fractional Polynomials to Model Continuous Covariates in
Regression Analysis".
DESCRIPTIVE ABSTRACT:
These data are hypothetical and were computer generated to follow a (-1,-1)
fractional polynomial model.
LIST OF VARIABLES:
Variable Description
_____________________________________
ID Identification code
X Independent variable
Y Outcome variable
_____________________________________
## Load dataset
fpeg <- read.table("http://www.umass.edu/statdata/statdata/data/fpexample.dat", col.names = c("ID","X","Y"))
## Modify dataset
fpeg <- within(fpeg, {
## Change to lower cases
x <- X
y <- Y
## Category indicator variable
indicator <- cut(X, breaks = c(-Inf,20,40,60,80,Inf), labels = c("i1","i2","i3","i4","i5"))
## Variables for spline
s1 <- pmax(X - 0, 0)
s2 <- pmax(X - 20, 0)
s3 <- pmax(X - 40, 0)
s4 <- pmax(X - 60, 0)
s5 <- pmax(X - 80, 0)
})
## Plot raw data as a scatter plot
## pdf(file = "./20131216DoseResponseGraphs2.pdf", width = 7, height = 7)
ggplot(data = fpeg,
mapping = aes(x = x, y = y)) +
layer(geom = "point") +
scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +
theme_bw() +
theme(legend.key = element_blank())
## Define a helper function for ggplot2
stat_fun <- function(fun, args, mapping) {
stat_function(fun = fun, args = args, mapping = mapping, n = 1001, size = 1.5, alpha = 1)
}
## Define prediction function
PredFun <- function(x, model) {
## Category indicator variable
indicator <- cut(x, breaks = c(-Inf,20,40,60,80,Inf), labels = c("i1","i2","i3","i4","i5"))
## Variables for spline
s1 <- pmax(x - 0, 0)
s2 <- pmax(x - 20, 0)
s3 <- pmax(x - 40, 0)
s4 <- pmax(x - 60, 0)
s5 <- pmax(x - 80, 0)
## Predict
predict(object = model, newdata = data.frame(x = x,
indicator = indicator,
s1 = s1, s2 = s2, s3 = s3, s4 = s4, s5 = s5))
}
Higher order polynomials and fractional polynomials make the dose response relationship more flexible.
## Linear model
lmLinear <- lm(formula = y ~ x,
data = fpeg)
## Quadratic model
lmQuadratic <- lm(formula = y ~ x + I(x^2),
data = fpeg)
## Cubit model
lmCubic <- lm(formula = y ~ x + I(x^2) + I(x^3),
data = fpeg)
## Fractional polynomial model
lmFractional <- lm(formula = y ~ x + I(x^2) + I(x^(1/2)),
data = fpeg)
## Create plot base
plotBasePoly <- ggplot(data = fpeg,
mapping = aes(x = x, y = y)) +
layer(geom = "point") +
scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +
scale_color_discrete(name = "model", breaks = c("Linear","Quadratic","Cubic","Fractional")) +
theme_bw() +
theme(legend.key = element_blank())
## Create layers
layerPolyLinear <- stat_fun(fun = PredFun, args = list(model = lmLinear), mapping = aes(color = "Linear"))
layerPolyQuadratic <- stat_fun(fun = PredFun, args = list(model = lmQuadratic), mapping = aes(color = "Quadratic"))
layerPolyCubic <- stat_fun(fun = PredFun, args = list(model = lmCubic), mapping = aes(color = "Cubic"))
layerPolyFractional <- stat_fun(fun = PredFun, args = list(model = lmFractional), mapping = aes(color = "Fractional"))
## Plot together
plotBasePoly + layerPolyLinear + layerPolyQuadratic + layerPolyCubic + layerPolyFractional
## Plot separately
plotPoly1 <- plotBasePoly + layerPolyLinear
plotPoly2 <- plotBasePoly + layerPolyQuadratic
plotPoly3 <- plotBasePoly + layerPolyCubic
plotPoly4 <- plotBasePoly + layerPolyFractional
grid.arrange(plotPoly1,plotPoly2,plotPoly3,plotPoly4)
The “spline family” models fit category specific models. The simplest case is the category-specific intercept only models, i.e., category indicator model. Category specific slopes result in the linear spline model.
## Category indicator model
lmInterceptSpline <- lm(formula = y ~ indicator,
data = fpeg)
## Linear spline model
lmLinearSpline <- lm(formula = y ~ x + s2 + s3 + s4 + s5,
data = fpeg)
## Quadratic spline model
lmQuadraticSpline <- lm(formula = y ~ x + I(x^2) + I(s2^2) + I(s3^2) + I(s4^2) + I(s5^2),
data = fpeg)
## Cubic spline model
lmCubicSpline <- lm(formula = y ~ x + I(x^2) + I(x^3) + I(s2^3) + I(s3^3) + I(s4^3) + I(s5^3),
data = fpeg)
## Create plot base
plotBaseSpline <- ggplot(data = fpeg,
mapping = aes(x = x, y = y)) +
layer(geom = "point") +
scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +
scale_color_discrete(name = "model", breaks = c("Intercept","Linear","Quadratic","Cubic")) +
theme_bw() +
theme(legend.key = element_blank())
## Create layers
layerSpIntercept <- stat_fun(fun = PredFun, args = list(model = lmInterceptSpline), mapping = aes(color = "Intercept"))
layerSpLinear <- stat_fun(fun = PredFun, args = list(model = lmLinearSpline), mapping = aes(color = "Linear"))
layerSpQuadratic <- stat_fun(fun = PredFun, args = list(model = lmQuadraticSpline), mapping = aes(color = "Quadratic"))
layerSpCubic <- stat_fun(fun = PredFun, args = list(model = lmCubicSpline), mapping = aes(color = "Cubic"))
## Plot together
plotBaseSpline + layerSpIntercept + layerSpLinear + layerSpQuadratic + layerSpCubic
## Plot separately
plotSpline1 <- plotBaseSpline + layerSpIntercept
plotSpline2 <- plotBaseSpline + layerSpLinear
plotSpline3 <- plotBaseSpline + layerSpQuadratic
plotSpline4 <- plotBaseSpline + layerSpCubic
grid.arrange(plotSpline1,plotSpline2,plotSpline3,plotSpline4)
## dev.off()
## Fit GAM
gamOne <- gam(formula = y ~ s(x),
data = fpeg)
## Create plot base for GAM
plotBaseGam <- ggplot(data = fpeg,
mapping = aes(x = x, y = y)) +
layer(geom = "point") +
scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +
scale_color_discrete(name = "model", breaks = c("Intercept","Linear","Quadratic","Cubic","GAM")) +
theme_bw() +
theme(legend.key = element_blank())
## Create a GAM layer
layerGam1 <- stat_fun(fun = PredFun, args = list(model = gamOne), mapping = aes(color = "GAM"))
## Show with splines
plotBaseGam + layerSpIntercept + layerSpLinear + layerSpQuadratic + layerGam1
## Plot separately
plotGam1 <- plotBaseGam + layerGam1
grid.arrange(plotSpline1,plotSpline2,plotSpline3,plotGam1)
## Automatically fit fractional polynomials
mfpOne <- mfp(formula = y ~ fp(x, df = 4),
data = fpeg)
## Check model for transformation
summary(mfpOne)
##
## Call:
## glm(formula = y ~ I((x/100)^-1) + I((x/100)^-1 * log((x/100))),
## data = fpeg)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.370 -3.312 -0.143 3.553 10.830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.674 1.402 6.9 5.4e-10 ***
## I((x/100)^-1) 10.050 0.747 13.4 < 2e-16 ***
## I((x/100)^-1 * log((x/100))) 3.138 0.233 13.5 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 23.56)
##
## Null deviance: 6576.8 on 99 degrees of freedom
## Residual deviance: 2285.3 on 97 degrees of freedom
## AIC: 604.7
##
## Number of Fisher Scoring iterations: 2
## Create plot base for mfp
plotBaseMfp <- ggplot(data = fpeg,
mapping = aes(x = x, y = y)) +
layer(geom = "point") +
scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +
scale_color_discrete(name = "model", breaks = c("Intercept","Linear","Quadratic","Cubic","GAM","mfp")) +
theme_bw() +
theme(legend.key = element_blank())
## Create a mfp layer
layerMfp <- stat_fun(fun = PredFun, args = list(model = mfpOne), mapping = aes(color = "mfp"))
## Show with splines
plotBaseMfp + layerSpIntercept + layerSpLinear + layerSpQuadratic + layerGam1 + layerMfp
## Plot separately
plotMfp1 <- plotBaseMfp + layerMfp
grid.arrange(plotSpline2,plotSpline3,plotGam1, plotMfp1)
## Create plot base
plotBase <- ggplot(data = fpeg,
mapping = aes(x = x, y = y)) +
layer(geom = "point") +
scale_x_continuous(breaks = seq(from = 0, to = 100, by = 20)) +
scale_color_discrete(name = "model") +
theme_bw() +
theme(legend.key = element_blank())
## Define a "fit and plot" function
FitLayer <- function(formula, fitFun = lm, print = FALSE) {
## Fit model
modelFit <- fitFun(formula = formula,
data = fpeg)
## Print result
if (print == TRUE) print(summary(modelFit))
## Return a layer
stat_fun(fun = PredFun, args = list(model = modelFit), mapping = aes(x = x))
}
## Linear term only
plotBase + FitLayer(y ~ x)
## Quadratic
plotBase + FitLayer(y ~ x + I(x^2))
## Cubic
plotBase + FitLayer(y ~ x + I(x^2) + I(x^3))
## Fractional
plotBase + FitLayer(y ~ x + I(x^2) + I(x^(1/2)))
## Multiple Fractional Polynomial Model (Automatic)
plotBase + FitLayer(y ~ fp(x), mfp, print = T)
##
## Call:
## glm(formula = y ~ I((x/100)^-1) + I((x/100)^-1 * log((x/100))),
## data = fpeg)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.370 -3.312 -0.143 3.553 10.830
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.674 1.402 6.9 5.4e-10 ***
## I((x/100)^-1) 10.050 0.747 13.4 < 2e-16 ***
## I((x/100)^-1 * log((x/100))) 3.138 0.233 13.5 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 23.56)
##
## Null deviance: 6576.8 on 99 degrees of freedom
## Residual deviance: 2285.3 on 97 degrees of freedom
## AIC: 604.7
##
## Number of Fisher Scoring iterations: 2
## "Intercept" spline (category indicator)
plotBase + FitLayer(y ~ indicator)
## Linear spline
plotBase + FitLayer(y ~ x + s2 + s3 + s4 + s5)
## Quadratic spline (unrestricted)
plotBase + FitLayer(y ~ x + I(x^2) + I(s2^2) + I(s3^2) + I(s4^2) + I(s5^2))
## Quadratic spline restricted with 1st segment linear restriction
plotBase + FitLayer(y ~ x + + I(s2^2) + I(s3^2) + I(s4^2) + I(s5^2))
## Quadratic spline restricted with 1st & last segment linear restriction
plotBase + FitLayer(y ~ x + + I(s2^2 - s5^2) + I(s3^2 - s5^2) + I(s4^2 - s5^2))
## Cubic spline (unrestricted)
plotBase + FitLayer(y ~ x + I(x^2) + I(x^3) + I(s2^3) + I(s3^3) + I(s4^3) + I(s5^3))
## Generalized Additive Model (Nonparametric smoother)
plotBase + FitLayer(y ~ s(x), gam)
## Spline with splines::bs(): Linear spline with 1 equally-sized segment (df-degree knots)
plotBase + FitLayer(y ~ bs(x, df = 1, knots = NULL, degree = 1))
## Spline with splines::bs(): Linear spline with 2 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 2, knots = NULL, degree = 1))
## Spline with splines::bs(): Linear spline with 3 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 3, knots = NULL, degree = 1))
## Spline with splines::bs(): Linear spline with 4 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 4, knots = NULL, degree = 1))
## Spline with splines::bs(): Linear spline with 5 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 5, knots = NULL, degree = 1))
## Spline with splines::bs(): Linear spline with fixed boundaries
plotBase + FitLayer(y ~ bs(x, df = NULL, knots = c(20,40,60,80), degree = 1))
## Spline with splines::bs(): Linear spline with 10 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 10, knots = NULL, degree = 1))
## Spline with splines::bs(): Linear spline with 20 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 20, knots = NULL, degree = 1))
## Spline with splines::bs(): Quadratic spline with 1 equally-sized segment (df-degree knots)
plotBase + FitLayer(y ~ bs(x, df = 2, knots = NULL, degree = 2))
## Spline with splines::bs(): Quadratic spline with 2 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 3, knots = NULL, degree = 2))
## Spline with splines::bs(): Quadratic spline with 3 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 4, knots = NULL, degree = 2))
## Spline with splines::bs(): Quadratic spline with 4 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 5, knots = NULL, degree = 2))
## Spline with splines::bs(): Quadratic spline with 5 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 6, knots = NULL, degree = 2))
## Spline with splines::bs(): Quadratic spline with 10 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 11, knots = NULL, degree = 2))
## Spline with splines::bs(): Quadratic spline with 20 equally-sized segment
plotBase + FitLayer(y ~ bs(x, df = 21, knots = NULL, degree = 2))