References
Load packages
library(magrittr)
library(splines)
library(MASS)
library(ggplot2)
library(reshape2)
Plot
data(mcycle)
baseplot1 <- ggplot(data = mcycle, mapping = aes(x = times, y = accel)) +
layer(geom = "point") +
theme_bw() + theme(legend.key = element_blank())
baseplot1

Create four knots
knots <- c(15,20,32,40)
mcycle$X1 <- pmax(0, mcycle$times - knots[1])
mcycle$X2 <- pmax(0, mcycle$times - knots[2])
mcycle$X3 <- pmax(0, mcycle$times - knots[3])
mcycle$X4 <- pmax(0, mcycle$times - knots[4])
mcycle
## times accel X1 X2 X3 X4
## 1 2.4 0.0 0.0 0.0 0.0 0.0
## 2 2.6 -1.3 0.0 0.0 0.0 0.0
## 3 3.2 -2.7 0.0 0.0 0.0 0.0
## 4 3.6 0.0 0.0 0.0 0.0 0.0
## 5 4.0 -2.7 0.0 0.0 0.0 0.0
## 6 6.2 -2.7 0.0 0.0 0.0 0.0
## 7 6.6 -2.7 0.0 0.0 0.0 0.0
## 8 6.8 -1.3 0.0 0.0 0.0 0.0
## 9 7.8 -2.7 0.0 0.0 0.0 0.0
## 10 8.2 -2.7 0.0 0.0 0.0 0.0
## 11 8.8 -1.3 0.0 0.0 0.0 0.0
## 12 8.8 -2.7 0.0 0.0 0.0 0.0
## 13 9.6 -2.7 0.0 0.0 0.0 0.0
## 14 10.0 -2.7 0.0 0.0 0.0 0.0
## 15 10.2 -5.4 0.0 0.0 0.0 0.0
## 16 10.6 -2.7 0.0 0.0 0.0 0.0
## 17 11.0 -5.4 0.0 0.0 0.0 0.0
## 18 11.4 0.0 0.0 0.0 0.0 0.0
## 19 13.2 -2.7 0.0 0.0 0.0 0.0
## 20 13.6 -2.7 0.0 0.0 0.0 0.0
## 21 13.8 0.0 0.0 0.0 0.0 0.0
## 22 14.6 -13.3 0.0 0.0 0.0 0.0
## 23 14.6 -5.4 0.0 0.0 0.0 0.0
## 24 14.6 -5.4 0.0 0.0 0.0 0.0
## 25 14.6 -9.3 0.0 0.0 0.0 0.0
## 26 14.6 -16.0 0.0 0.0 0.0 0.0
## 27 14.6 -22.8 0.0 0.0 0.0 0.0
## 28 14.8 -2.7 0.0 0.0 0.0 0.0
## 29 15.4 -22.8 0.4 0.0 0.0 0.0
## 30 15.4 -32.1 0.4 0.0 0.0 0.0
## 31 15.4 -53.5 0.4 0.0 0.0 0.0
## 32 15.4 -54.9 0.4 0.0 0.0 0.0
## 33 15.6 -40.2 0.6 0.0 0.0 0.0
## 34 15.6 -21.5 0.6 0.0 0.0 0.0
## 35 15.8 -21.5 0.8 0.0 0.0 0.0
## 36 15.8 -50.8 0.8 0.0 0.0 0.0
## 37 16.0 -42.9 1.0 0.0 0.0 0.0
## 38 16.0 -26.8 1.0 0.0 0.0 0.0
## 39 16.2 -21.5 1.2 0.0 0.0 0.0
## 40 16.2 -50.8 1.2 0.0 0.0 0.0
## 41 16.2 -61.7 1.2 0.0 0.0 0.0
## 42 16.4 -5.4 1.4 0.0 0.0 0.0
## 43 16.4 -80.4 1.4 0.0 0.0 0.0
## 44 16.6 -59.0 1.6 0.0 0.0 0.0
## 45 16.8 -71.0 1.8 0.0 0.0 0.0
## 46 16.8 -91.1 1.8 0.0 0.0 0.0
## 47 16.8 -77.7 1.8 0.0 0.0 0.0
## 48 17.6 -37.5 2.6 0.0 0.0 0.0
## 49 17.6 -85.6 2.6 0.0 0.0 0.0
## 50 17.6 -123.1 2.6 0.0 0.0 0.0
## 51 17.6 -101.9 2.6 0.0 0.0 0.0
## 52 17.8 -99.1 2.8 0.0 0.0 0.0
## 53 17.8 -104.4 2.8 0.0 0.0 0.0
## 54 18.6 -112.5 3.6 0.0 0.0 0.0
## 55 18.6 -50.8 3.6 0.0 0.0 0.0
## 56 19.2 -123.1 4.2 0.0 0.0 0.0
## 57 19.4 -85.6 4.4 0.0 0.0 0.0
## 58 19.4 -72.3 4.4 0.0 0.0 0.0
## 59 19.6 -127.2 4.6 0.0 0.0 0.0
## 60 20.2 -123.1 5.2 0.2 0.0 0.0
## 61 20.4 -117.9 5.4 0.4 0.0 0.0
## 62 21.2 -134.0 6.2 1.2 0.0 0.0
## 63 21.4 -101.9 6.4 1.4 0.0 0.0
## 64 21.8 -108.4 6.8 1.8 0.0 0.0
## 65 22.0 -123.1 7.0 2.0 0.0 0.0
## 66 23.2 -123.1 8.2 3.2 0.0 0.0
## 67 23.4 -128.5 8.4 3.4 0.0 0.0
## 68 24.0 -112.5 9.0 4.0 0.0 0.0
## 69 24.2 -95.1 9.2 4.2 0.0 0.0
## 70 24.2 -81.8 9.2 4.2 0.0 0.0
## 71 24.6 -53.5 9.6 4.6 0.0 0.0
## 72 25.0 -64.4 10.0 5.0 0.0 0.0
## 73 25.0 -57.6 10.0 5.0 0.0 0.0
## 74 25.4 -72.3 10.4 5.4 0.0 0.0
## 75 25.4 -44.3 10.4 5.4 0.0 0.0
## 76 25.6 -26.8 10.6 5.6 0.0 0.0
## 77 26.0 -5.4 11.0 6.0 0.0 0.0
## 78 26.2 -107.1 11.2 6.2 0.0 0.0
## 79 26.2 -21.5 11.2 6.2 0.0 0.0
## 80 26.4 -65.6 11.4 6.4 0.0 0.0
## 81 27.0 -16.0 12.0 7.0 0.0 0.0
## 82 27.2 -45.6 12.2 7.2 0.0 0.0
## 83 27.2 -24.2 12.2 7.2 0.0 0.0
## 84 27.2 9.5 12.2 7.2 0.0 0.0
## 85 27.6 4.0 12.6 7.6 0.0 0.0
## 86 28.2 12.0 13.2 8.2 0.0 0.0
## 87 28.4 -21.5 13.4 8.4 0.0 0.0
## 88 28.4 37.5 13.4 8.4 0.0 0.0
## 89 28.6 46.9 13.6 8.6 0.0 0.0
## 90 29.4 -17.4 14.4 9.4 0.0 0.0
## 91 30.2 36.2 15.2 10.2 0.0 0.0
## 92 31.0 75.0 16.0 11.0 0.0 0.0
## 93 31.2 8.1 16.2 11.2 0.0 0.0
## 94 32.0 54.9 17.0 12.0 0.0 0.0
## 95 32.0 48.2 17.0 12.0 0.0 0.0
## 96 32.8 46.9 17.8 12.8 0.8 0.0
## 97 33.4 16.0 18.4 13.4 1.4 0.0
## 98 33.8 45.6 18.8 13.8 1.8 0.0
## 99 34.4 1.3 19.4 14.4 2.4 0.0
## 100 34.8 75.0 19.8 14.8 2.8 0.0
## 101 35.2 -16.0 20.2 15.2 3.2 0.0
## 102 35.2 -54.9 20.2 15.2 3.2 0.0
## 103 35.4 69.6 20.4 15.4 3.4 0.0
## 104 35.6 34.8 20.6 15.6 3.6 0.0
## 105 35.6 32.1 20.6 15.6 3.6 0.0
## 106 36.2 -37.5 21.2 16.2 4.2 0.0
## 107 36.2 22.8 21.2 16.2 4.2 0.0
## 108 38.0 46.9 23.0 18.0 6.0 0.0
## 109 38.0 10.7 23.0 18.0 6.0 0.0
## 110 39.2 5.4 24.2 19.2 7.2 0.0
## 111 39.4 -1.3 24.4 19.4 7.4 0.0
## 112 40.0 -21.5 25.0 20.0 8.0 0.0
## 113 40.4 -13.3 25.4 20.4 8.4 0.4
## 114 41.6 30.8 26.6 21.6 9.6 1.6
## 115 41.6 -10.7 26.6 21.6 9.6 1.6
## 116 42.4 29.4 27.4 22.4 10.4 2.4
## 117 42.8 0.0 27.8 22.8 10.8 2.8
## 118 42.8 -10.7 27.8 22.8 10.8 2.8
## 119 43.0 14.7 28.0 23.0 11.0 3.0
## 120 44.0 -1.3 29.0 24.0 12.0 4.0
## 121 44.4 0.0 29.4 24.4 12.4 4.4
## 122 45.0 10.7 30.0 25.0 13.0 5.0
## 123 46.6 10.7 31.6 26.6 14.6 6.6
## 124 47.8 -26.8 32.8 27.8 15.8 7.8
## 125 47.8 -14.7 32.8 27.8 15.8 7.8
## 126 48.8 -13.3 33.8 28.8 16.8 8.8
## 127 50.6 0.0 35.6 30.6 18.6 10.6
## 128 52.0 10.7 37.0 32.0 20.0 12.0
## 129 53.2 -14.7 38.2 33.2 21.2 13.2
## 130 55.0 -2.7 40.0 35.0 23.0 15.0
## 131 55.0 10.7 40.0 35.0 23.0 15.0
## 132 55.4 -2.7 40.4 35.4 23.4 15.4
## 133 57.6 10.7 42.6 37.6 25.6 17.6
Linear splines
lsp <- lm(accel ~ times + X1 + X2 + X3 + X4, data = mcycle)
summary(lsp)
##
## Call:
## lm(formula = accel ~ times + X1 + X2 + X3 + X4, data = mcycle)
##
## Residuals:
## Min 1Q Median 3Q Max
## -82.740 -12.593 1.321 10.996 52.514
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.4528 10.7829 0.784 0.43455
## times -1.6286 0.8911 -1.828 0.06996 .
## X1 -22.6321 2.3485 -9.637 < 2e-16 ***
## X2 39.7390 2.4738 16.064 < 2e-16 ***
## X3 -21.9223 1.9785 -11.080 < 2e-16 ***
## X4 6.7149 1.9103 3.515 0.00061 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23.52 on 127 degrees of freedom
## Multiple R-squared: 0.772, Adjusted R-squared: 0.763
## F-statistic: 86.01 on 5 and 127 DF, p-value: < 2.2e-16
newdat <- data.frame(times = seq(0,60,0.01))
newdat$X1 <- pmax(0, newdat$times - knots[1])
newdat$X2 <- pmax(0, newdat$times - knots[2])
newdat$X3 <- pmax(0, newdat$times - knots[3])
newdat$X4 <- pmax(0, newdat$times - knots[4])
newdat$linear <- predict(lsp, newdata = newdat)
Quadratic splines
qsp <- lm(accel ~ times + I(times^2) + I(X1^2) + I(X2^2) + I(X3^2) + I(X4^2), data = mcycle)
summary(qsp)
##
## Call:
## lm(formula = accel ~ times + I(times^2) + I(X1^2) + I(X2^2) +
## I(X3^2) + I(X4^2), data = mcycle)
##
## Residuals:
## Min 1Q Median 3Q Max
## -87.815 -20.748 0.032 21.613 76.130
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -72.8237 26.4683 -2.751 0.00681 **
## times 22.9006 5.6727 4.037 0.0000933658 ***
## I(times^2) -1.3995 0.2653 -5.274 0.0000005625 ***
## I(X1^2) 3.7175 0.6258 5.941 0.0000000259 ***
## I(X2^2) -2.0250 0.4977 -4.069 0.0000826801 ***
## I(X3^2) -1.5047 0.3416 -4.405 0.0000223542 ***
## I(X4^2) 1.6797 0.3587 4.683 0.0000071942 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 31.13 on 126 degrees of freedom
## Multiple R-squared: 0.6039, Adjusted R-squared: 0.585
## F-statistic: 32.02 on 6 and 126 DF, p-value: < 2.2e-16
newdat$quadratic <- predict(qsp, newdata = newdat)
Cubic splines
csp <- lm(accel ~ times + I(times^2) + I(times^3) + I(X1^3) + I(X2^3) + I(X3^3) + I(X4^3), data = mcycle)
summary(csp)
##
## Call:
## lm(formula = accel ~ times + I(times^2) + I(times^3) + I(X1^3) +
## I(X2^3) + I(X3^3) + I(X4^3), data = mcycle)
##
## Residuals:
## Min 1Q Median 3Q Max
## -93.458 -13.221 -0.257 13.418 49.473
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.93411 32.92156 0.788 0.43233
## times -15.20812 11.31370 -1.344 0.18131
## I(times^2) 2.29557 1.10198 2.083 0.03928 *
## I(times^3) -0.10146 0.03226 -3.145 0.00208 **
## I(X1^3) 0.50733 0.07129 7.116 7.63e-11 ***
## I(X2^3) -0.58777 0.05460 -10.765 < 2e-16 ***
## I(X3^3) 0.34484 0.03304 10.437 < 2e-16 ***
## I(X4^3) -0.21077 0.03528 -5.974 2.24e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.23 on 125 degrees of freedom
## Multiple R-squared: 0.7618, Adjusted R-squared: 0.7485
## F-statistic: 57.13 on 7 and 125 DF, p-value: < 2.2e-16
newdat$cubic <- predict(csp, newdata = newdat)
Plot splines
newdatMelt <- melt(data = newdat,
id.vars = c("times",paste0("X",1:4)),
variable.name = "spline",
value.name = "value")
baseplot1 +
layer(geom = "line", data = newdatMelt,
mapping = aes(x = times, y = value, color = spline),
size = 1) +
facet_wrap( ~ spline, ncol = 1)
