## Load datasets package
library(datasets)
## Load ggplot2 for plotting
library(ggplot2)
## Load gridExtra for fine tuning
library(gridExtra)
Description:
Waiting time between eruptions and the duration of the eruption
for the Old Faithful geyser in Yellowstone National Park, Wyoming,
USA.
Format:
A data frame with 272 observations on 2 variables.
[,1] eruptions numeric Eruption time in mins
[,2] waiting numeric Waiting time to next eruption (in mins)
## Load
data(faithful)
## Modify dataset
faithful <- within(faithful, {
## Rename for convenience
x <- eruptions
y <- waiting
## Category indicator variable
indicator <- cut(x, breaks = c(-Inf,2.5,3.5,4.5,Inf), labels = c("i1","i2","i3","i4"))
## Variables for spline
s1 <- pmax(x - 1.5, 0)
s2 <- pmax(x - 2.5, 0)
s3 <- pmax(x - 3.5, 0)
s4 <- pmax(x - 4.5, 0)
})
## Plot raw data as a scatter plot
ggplot(data = faithful,
mapping = aes(x = x, y = y)) +
layer(geom = "point") +
theme_bw() +
theme(legend.key = element_blank())
## Define a helper function for ggplot2
stat_fun <- function(fun, args, mapping) {
stat_function(fun = fun, args = args, mapping = mapping, n = 1001, size = 1.5, alpha = 1)
}
## Define prediction function
PredFun <- function(x, model) {
## Category indicator variable
indicator <- cut(x, breaks = c(-Inf,2.5,3.5,4.5,Inf), labels = c("i1","i2","i3","i4"))
## Variables for spline
s1 <- pmax(x - 1.5, 0)
s2 <- pmax(x - 2.5, 0)
s3 <- pmax(x - 3.5, 0)
s4 <- pmax(x - 4.5, 0)
## Predict
predict(object = model, newdata = data.frame(x = x, indicator = indicator, s1 = s1, s2 = s2, s3 = s3, s4 = s4))
}
Higher order polynomials and fractional polynomials make the dose response relationship more flexible.
## Linear model
lmLinear <- lm(formula = y ~ x,
data = faithful)
## Quadratic model
lmQuadratic <- lm(formula = y ~ x + I(x^2),
data = faithful)
## Cubit model
lmCubic <- lm(formula = y ~ x + I(x^2) + I(x^3),
data = faithful)
## Fractional polynomial model
lmFractional <- lm(formula = y ~ x + I(x^2) + I(x^(1/2)),
data = faithful)
## Create plot base
plotBasePoly <- ggplot(data = faithful,
mapping = aes(x = x, y = y)) +
layer(geom = "point") +
scale_color_discrete(name = "model", breaks = c("Linear","Quadratic","Cubic","Fractional")) +
theme_bw() +
theme(legend.key = element_blank())
## Create layers
layerPolyLinear <- stat_fun(fun = PredFun, args = list(model = lmLinear), mapping = aes(color = "Linear"))
layerPolyQuadratic <- stat_fun(fun = PredFun, args = list(model = lmQuadratic), mapping = aes(color = "Quadratic"))
layerPolyCubic <- stat_fun(fun = PredFun, args = list(model = lmCubic), mapping = aes(color = "Cubic"))
layerPolyFractional <- stat_fun(fun = PredFun, args = list(model = lmFractional), mapping = aes(color = "Fractional"))
## Plot together
plotBasePoly + layerPolyLinear + layerPolyQuadratic + layerPolyCubic + layerPolyFractional
## Plot separately
plotPoly1 <- plotBasePoly + layerPolyLinear
plotPoly2 <- plotBasePoly + layerPolyQuadratic
plotPoly3 <- plotBasePoly + layerPolyCubic
plotPoly4 <- plotBasePoly + layerPolyFractional
grid.arrange(plotPoly1,plotPoly2,plotPoly3,plotPoly4)
The “spline family” models fit category specific models. The simplest case is the category-specific intercept only models, i.e., category indicator model. Category specific slopes result in the linear spline model.
## Category indicator model
lmInterceptSpline <- lm(formula = y ~ indicator,
data = faithful)
## Linear spline model
lmLinearSpline <- lm(formula = y ~ x + s2 + s3 + s4,
data = faithful)
## Quadratic spline model
lmQuadraticSpline <- lm(formula = y ~ x + I(x^2) + I(s2^2) + I(s3^2) + I(s3^2),
data = faithful)
## Cubic spline model
lmCubicSpline <- lm(formula = y ~ x + I(x^2) + I(x^3) + I(s2^3) + I(s3^3) + I(s3^3),
data = faithful)
## Create plot base
plotBaseSpline <- ggplot(data = faithful,
mapping = aes(x = x, y = y)) +
layer(geom = "point") +
scale_color_discrete(name = "model", breaks = c("Intercept","Linear","Quadratic","Cubic")) +
theme_bw() +
theme(legend.key = element_blank())
## Create layers
layerSpIntercept <- stat_fun(fun = PredFun, args = list(model = lmInterceptSpline), mapping = aes(color = "Intercept"))
layerSpLinear <- stat_fun(fun = PredFun, args = list(model = lmLinearSpline), mapping = aes(color = "Linear"))
layerSpQuadratic <- stat_fun(fun = PredFun, args = list(model = lmQuadraticSpline), mapping = aes(color = "Quadratic"))
layerSpCubic <- stat_fun(fun = PredFun, args = list(model = lmCubicSpline), mapping = aes(color = "Cubic"))
## Plot together
plotBaseSpline + layerSpIntercept + layerSpLinear + layerSpQuadratic + layerSpCubic
## Plot separately
plotSpline1 <- plotBaseSpline + layerSpIntercept
plotSpline2 <- plotBaseSpline + layerSpLinear
plotSpline3 <- plotBaseSpline + layerSpQuadratic
plotSpline4 <- plotBaseSpline + layerSpCubic
grid.arrange(plotSpline1,plotSpline2,plotSpline3,plotSpline4)