env <- new.env() # Create an environment to store data sets
load("../data.rda", envir = env) # Load the data sets into this environment
df <- get("CH01TA01", envir = env) # Assign a data set to an R object
names(df) <- c("x", "y") # Give useful, albeit arbitrary, names to our variables
xx <- scale(df[, 'x'], TRUE, FALSE) # Center the x values
yy <- scale(df[, 'y'], TRUE, FALSE) # Center the y values
# Create a table for our output
tab <- transform(df,
x = x,
y = y,
xdif = xx,
ydif = yy,
crp = round(xx * yy),
sqdevx = xx^2,
sqdevy = round(yy^2))
# Append summary rows to our table
tab <- rbind(
tab,
Total = round(apply(tab, 2, sum), 0),
Mean = c(colMeans(df), rep("", 5)))
print(tab, quote = FALSE)
## x y xdif ydif crp sqdevx sqdevy
## 1 80 399 10 86.72 867 100 7520
## 2 30 121 -40 -191.28 7651 1600 36588
## 3 50 221 -20 -91.28 1826 400 8332
## 4 90 376 20 63.72 1274 400 4060
## 5 70 361 0 48.72 0 0 2374
## 6 60 224 -10 -88.28 883 100 7793
## 7 120 546 50 233.72 11686 2500 54625
## 8 80 352 10 39.72 397 100 1578
## 9 100 353 30 40.72 1222 900 1658
## 10 50 157 -20 -155.28 3106 400 24112
## 11 40 160 -30 -152.28 4568 900 23189
## 12 70 252 0 -60.28 0 0 3634
## 13 90 389 20 76.72 1534 400 5886
## 14 20 113 -50 -199.28 9964 2500 39713
## 15 110 435 40 122.72 4909 1600 15060
## 16 100 420 30 107.72 3232 900 11604
## 17 30 212 -40 -100.28 4011 1600 10056
## 18 50 268 -20 -44.28 886 400 1961
## 19 90 377 20 64.72 1294 400 4189
## 20 110 421 40 108.72 4349 1600 11820
## 21 30 273 -40 -39.28 1571 1600 1543
## 22 90 468 20 155.72 3114 400 24249
## 23 40 244 -30 -68.28 2048 900 4662
## 24 80 342 10 29.72 297 100 883
## 25 70 323 0 10.72 0 0 115
## Total 1750 7807 0 0 70689 19800 307204
## Mean 70 312.28
fit <- lm(y ~ x, data = df) # Fit the linear regression model
summary(fit) # Output summary information for this model
##
## Call:
## lm(formula = y ~ x, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -83.88 -34.09 -5.98 38.83 103.53
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.366 26.177 2.38 0.026 *
## x 3.570 0.347 10.29 4.4e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.8 on 23 degrees of freedom
## Multiple R-squared: 0.822, Adjusted R-squared: 0.814
## F-statistic: 106 on 1 and 23 DF, p-value: 4.45e-10
plot(y ~ x, df, xlab = "Lot Size", ylab = "Hours", pch = 20)
title("(a) Scatter Plot") # Add a title to the current device plot
# notice the different ways by which to issue plot commands
with(df, plot(x, y, xlab = "Lot Size", ylab = "Hours", pch = 20))
title("(b) Fitted Regression Line") # Add a title to the current device plot
abline(fit) # Add a trend line for this fitted model
Notice the use of “accessor” methods fitted and resid. They are the recommended way of obtaining fitted values and residuals instead of direct named list element access such as fit[["residuals"]]
tab <- cbind(
"Lot Size (X)" = df$x,
"Work Hours (Y)" = df$y,
"Est. Mean Response" = round(fitted(fit), 2),
"Residuals" = round( resid(fit), 2),
"Sq.Residuals" = round( resid(fit)^2, 1))
rbind(tab, Totals = colSums(tab))
## Lot Size (X) Work Hours (Y) Est. Mean Response Residuals Sq.Residuals
## 1 80 399 348.0 51.02 2602.8
## 2 30 121 169.5 -48.47 2349.5
## 3 50 221 240.9 -19.88 395.1
## 4 90 376 383.7 -7.68 59.0
## 5 70 361 312.3 48.72 2373.6
## 6 60 224 276.6 -52.58 2764.4
## 7 120 546 490.8 55.21 3048.1
## 8 80 352 348.0 4.02 16.1
## 9 100 353 419.4 -66.39 4407.1
## 10 50 157 240.9 -83.88 7035.2
## 11 40 160 205.2 -45.17 2040.7
## 12 70 252 312.3 -60.28 3633.7
## 13 90 389 383.7 5.32 28.3
## 14 20 113 133.8 -20.77 431.4
## 15 110 435 455.1 -20.09 403.5
## 16 100 420 419.4 0.61 0.4
## 17 30 212 169.5 42.53 1808.6
## 18 50 268 240.9 27.12 735.7
## 19 90 377 383.7 -6.68 44.7
## 20 110 421 455.1 -34.09 1162.0
## 21 30 273 169.5 103.53 10718.1
## 22 90 468 383.7 84.32 7109.2
## 23 40 244 205.2 38.83 1507.5
## 24 80 342 348.0 -5.98 35.8
## 25 70 323 312.3 10.72 114.9
## Totals 1750 7807 7807.0 0.01 54825.4