env <- new.env()
load("../data.rda", envir = env)
df <- get("CH06FI05", envir = env)
names(df) <- c("x1", "x2", "y")
fit <- lm(y ~ x1 + x2, df)
with(df, pairs(data.frame("SALES" = y, "TARGETPOP" = x1, "DISPOINC" = x2)))
with(df, cor (data.frame("SALES" = y, "TARGETPOP" = x1, "DISPOINC" = x2)))
## SALES TARGETPOP DISPOINC
## SALES 1.0000 0.9446 0.8358
## TARGETPOP 0.9446 1.0000 0.7813
## DISPOINC 0.8358 0.7813 1.0000
cbind(
"TARGETPOP (X1)" = df$x1,
"DISPOINC (X2)" = df$x2,
"SALES (Y)" = df$y,
"FITTED" = round(fitted(fit),2),
"RESIDUAL" = round(resid(fit), 2))
## TARGETPOP (X1) DISPOINC (X2) SALES (Y) FITTED RESIDUAL
## 1 68.5 16.7 174.4 187.2 -12.78
## 2 45.2 16.8 164.4 154.2 10.17
## 3 91.3 18.2 244.2 234.4 9.80
## 4 47.8 16.3 154.6 153.3 1.27
## 5 46.9 17.3 181.6 161.4 20.22
## 6 66.1 18.2 207.5 197.7 9.76
## 7 49.5 15.9 152.8 152.1 0.74
## 8 52.0 17.2 163.2 167.9 -4.67
## 9 48.9 16.6 145.4 157.7 -12.34
## 10 38.4 16.0 137.2 136.8 0.35
## 11 87.9 18.3 241.9 230.4 11.51
## 12 72.8 17.1 191.1 197.2 -6.08
## 13 88.4 17.4 232.0 222.7 9.31
## 14 42.9 15.8 145.3 141.5 3.78
## 15 52.5 17.8 161.1 174.2 -13.11
## 16 85.7 18.4 209.7 228.1 -18.42
## 17 41.3 16.5 146.4 145.8 0.65
## 18 51.7 16.3 144.0 159.0 -15.00
## 19 89.6 18.1 232.6 231.0 1.61
## 20 82.7 19.1 224.1 230.3 -6.22
## 21 52.3 16.0 166.5 157.1 9.44
summary(fit)
##
## Call:
## lm(formula = y ~ x1 + x2, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.424 -6.216 0.745 9.436 20.215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -68.857 60.017 -1.15 0.266
## x1 1.455 0.212 6.87 2e-06 ***
## x2 9.366 4.064 2.30 0.033 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11 on 18 degrees of freedom
## Multiple R-squared: 0.917, Adjusted R-squared: 0.907
## F-statistic: 99.1 on 2 and 18 DF, p-value: 1.92e-10
anova(fit)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 23372 23372 192.90 4.6e-11 ***
## x2 1 643 643 5.31 0.033 *
## Residuals 18 2181 121
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Since lattice is a popular library, I decided to use cloud for this task. There are other libaries such as scatterplot3d, rgl and Rcmdr that include their own methods.
To rotate the cloud we use the screen parameter that takes a list of the position angles of the coordinate axes. The default screen value is
screen = list(z = 40, x = -60, y = 0)
By eyeballing what the author has done, the choice is made to rotate along the z axis to z = -30. The scales parameter deals with the axes values and most commonly the arrows = FALSE option is utilized so that numeric values are listed instead of the default pointed arrows along the axis.
library(lattice)
cloud(y ~ x1 + x2, df, scales = list(arrows = FALSE),
xlab = "TARGETPOP", ylab = "DISPOINC", zlab = "SALES",
main = "(a) Before Spinning")
cloud(y ~ x1 + x2, df, scales = list(arrows = FALSE),
xlab = "TARGETPOP", ylab = "DISPOINC", zlab = "SALES",
main = "(b) After Spinning", screen = list(z = -40, x = -60, y = 0))
The approach below uses a base R graphics function persp used in 3D plotting. The approach is simple in that you create 2 sorted sequences, one for each independent variable, both of equal length. You then expand a grid of all combinations of these sequences and use the linear model to predict the values for each of these combinations to create a new sequence. These 3 sequences are then input to persp to define how to draw the 3D plot along the dimensions of those sequences.
lm3d <- function(model, res, ...) {
ivs <- labels(terms(model))
x <- model.frame(model)[, ivs[1]] # 1st independent variable
y <- model.frame(model)[, ivs[2]] # 2nd independent variable
xr <- seq(min(x), max(x), len = res) # equidistant sequence from range of 1st iv
yr <- seq(min(y), max(y), len = res) # equidistant sequence from range of 2nd iv
# Create a list object of these sequences to be turned into a grid
newdata <- list(xr, yr)
names(newdata) <- ivs
newdata <- do.call('expand.grid', newdata)
zr <- matrix(predict(model, newdata), res, res)
persp(xr, yr, zr, ...)
}
# The resolution ('res') parameter controls how 'fine' the plane is
lm3d(fit, res = 30,
ticktype = "detailed", shade = 0.5, expand = 2/3,
xlab = "TARGETPOP", ylab = "DISPOINC", zlab = "SALES",
theta = 310, phi = 30)
While this provides a static view of fitted plane, it is possible to 'animate' a rotation with a little trickery. The interested reader should do something like the following.
# Change "len = 24" lower or higher for larger or smaller rotations
for (t in seq(0, 360, len = 24))
{
lm3d(fit, 25, ..., theta = t) # change phi for z-rotation
Sys.sleep(0.1) # How many seconds to delay before rotating
}
par(mfrow = c(2, 2), pch = 19)
plot(resid(fit) ~ fitted(fit), xlab = "Fitted", ylab = "Residual")
title("(a) Residual Plot against Y")
plot(resid(fit) ~ x1, df, xlab = "Targtpop", ylab = "Residual")
title("(b) Residual Plot against X1")
plot(resid(fit) ~ x2, df, xlab = "Dispoinc", ylab = "Residual")
title("(c) Residual Plot against X2")
plot(resid(fit) ~ I(x1 * x2), df, xlab = "X1X2", ylab = "Residual")
title("(d) Residual Plot against X1X2")
par(mfrow = c(1, 2), pch = 19)
plot(abs(resid(fit)) ~ fitted(fit), xlab = "Fitted", ylab = "Absresid")
title("(a) Plot of Absolute Residuals against Y")
qqnorm(resid(fit), main = "", xlab = "Expected", ylab = "Residual")
qqline(resid(fit))
title("(b) Normal Probability Plot")
confint(fit)
## 2.5 % 97.5 %
## (Intercept) -194.9480 57.234
## x1 1.0096 1.899
## x2 0.8274 17.904
predict(fit, data.frame(x1 = 65.4, x2 = 17.6), interval = "confidence")
## fit lwr upr
## 1 191.1 185.3 196.9
The ci.sim from Ch. 4 handles this multivariate case without change.
ci.sim <- function(model, newdata, type = c("B", "S"), alpha = 0.05)
{
g <- nrow(newdata)
CI <- predict(model, newdata, se.fit = TRUE)
M <- ifelse(match.arg(type) == "B",
qt(1 - alpha / (2*g), model$df), # B = (4.9a)
sqrt(g * qf(1 - alpha, g, model$df))) # S = (4.8a)
spred <- sqrt( CI$residual.scale^2 + (CI$se.fit)^2 ) # (2.38)
x <- data.frame(
"x" = newdata,
"spred" = spred,
"fit" = CI$fit,
"lower" = CI$fit - M * spred,
"upper" = CI$fit + M * spred)
return(x)
}
newdata <- data.frame( x1 = c(65.4, 53.1), x2 = c(17.6, 17.7) )
ci.sim(fit, newdata, "B", 0.1) # Bonferroni Prediction
## x.x1 x.x2 spred fit lower upper
## 1 65.4 17.6 11.35 191.1 167.3 214.9
## 2 53.1 17.7 11.93 174.1 149.1 199.2
ci.sim(fit, newdata, "S", 0.1) # Scheffe Prediction
## x.x1 x.x2 spred fit lower upper
## 1 65.4 17.6 11.35 191.1 165.1 217.1
## 2 53.1 17.7 11.93 174.1 146.8 201.5