10/17/2017
par(ask=TRUE) opar <- par(no.readonly=TRUE) # Listing 8.1 - Simple linear regression fit <- lm(weight ~ height, data=women) summary(fit)
## ## Call: ## lm(formula = weight ~ height, data = women) ## ## Residuals: ## Min 1Q Median 3Q Max ## -1.7333 -1.1333 -0.3833 0.7417 3.1167 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) -87.51667 5.93694 -14.74 1.71e-09 *** ## height 3.45000 0.09114 37.85 1.09e-14 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 1.525 on 13 degrees of freedom ## Multiple R-squared: 0.991, Adjusted R-squared: 0.9903 ## F-statistic: 1433 on 1 and 13 DF, p-value: 1.091e-14
women$weight
## [1] 115 117 120 123 126 129 132 135 139 142 146 150 154 159 164
fitted(fit)
## 1 2 3 4 5 6 7 8 ## 112.5833 116.0333 119.4833 122.9333 126.3833 129.8333 133.2833 136.7333 ## 9 10 11 12 13 14 15 ## 140.1833 143.6333 147.0833 150.5333 153.9833 157.4333 160.8833
residuals(fit)
## 1 2 3 4 5 6 ## 2.41666667 0.96666667 0.51666667 0.06666667 -0.38333333 -0.83333333 ## 7 8 9 10 11 12 ## -1.28333333 -1.73333333 -1.18333333 -1.63333333 -1.08333333 -0.53333333 ## 13 14 15 ## 0.01666667 1.56666667 3.11666667
plot(women$height,women$weight,
main="Women Age 30-39",
xlab="Height (in inches)",
ylab="Weight (in pounds)")
# add the line of best fit
abline(fit)
# Listing 8.2 - Polynomial regression fit2 <- lm(weight ~ height + I(height^2), data=women) summary(fit2)
## ## Call: ## lm(formula = weight ~ height + I(height^2), data = women) ## ## Residuals: ## Min 1Q Median 3Q Max ## -0.50941 -0.29611 -0.00941 0.28615 0.59706 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 261.87818 25.19677 10.393 2.36e-07 *** ## height -7.34832 0.77769 -9.449 6.58e-07 *** ## I(height^2) 0.08306 0.00598 13.891 9.32e-09 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 0.3841 on 12 degrees of freedom ## Multiple R-squared: 0.9995, Adjusted R-squared: 0.9994 ## F-statistic: 1.139e+04 on 2 and 12 DF, p-value: < 2.2e-16
plot(women$height,women$weight,
main="Women Age 30-39",
xlab="Height (in inches)",
ylab="Weight (in lbs)")
lines(women$height,fitted(fit2))
# Enhanced scatterplot for women data
library(car)
library(car)
scatterplot(weight ~ height, data=women,
spread=FALSE, smoother.args=list(lty=2), pch=19,
main="Women Age 30-39",
xlab="Height (inches)",
ylab="Weight (lbs.)")
# Listing 8.3 - Examining bivariate relationships
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
cor(states)
## Murder Population Illiteracy Income Frost ## Murder 1.0000000 0.3436428 0.7029752 -0.2300776 -0.5388834 ## Population 0.3436428 1.0000000 0.1076224 0.2082276 -0.3321525 ## Illiteracy 0.7029752 0.1076224 1.0000000 -0.4370752 -0.6719470 ## Income -0.2300776 0.2082276 -0.4370752 1.0000000 0.2262822 ## Frost -0.5388834 -0.3321525 -0.6719470 0.2262822 1.0000000
library(car)
scatterplotMatrix(states, spread=FALSE, smoother.args=list(lty=2),
main="Scatter Plot Matrix")
# Listing 8.4 - Multiple linear regression
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
fit <- lm(Murder ~ Population + Illiteracy + Income + Frost, data=states)
summary(fit)
## ## Call: ## lm(formula = Murder ~ Population + Illiteracy + Income + Frost, ## data = states) ## ## Residuals: ## Min 1Q Median 3Q Max ## -4.7960 -1.6495 -0.0811 1.4815 7.6210 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 1.235e+00 3.866e+00 0.319 0.7510 ## Population 2.237e-04 9.052e-05 2.471 0.0173 * ## Illiteracy 4.143e+00 8.744e-01 4.738 2.19e-05 *** ## Income 6.442e-05 6.837e-04 0.094 0.9253 ## Frost 5.813e-04 1.005e-02 0.058 0.9541 ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 2.535 on 45 degrees of freedom ## Multiple R-squared: 0.567, Adjusted R-squared: 0.5285 ## F-statistic: 14.73 on 4 and 45 DF, p-value: 9.133e-08
# Listing 8.5 - Mutiple linear regression with a significant interaction term fit <- lm(mpg ~ hp + wt + hp:wt, data=mtcars) summary(fit)
## ## Call: ## lm(formula = mpg ~ hp + wt + hp:wt, data = mtcars) ## ## Residuals: ## Min 1Q Median 3Q Max ## -3.0632 -1.6491 -0.7362 1.4211 4.5513 ## ## Coefficients: ## Estimate Std. Error t value Pr(>|t|) ## (Intercept) 49.80842 3.60516 13.816 5.01e-14 *** ## hp -0.12010 0.02470 -4.863 4.04e-05 *** ## wt -8.21662 1.26971 -6.471 5.20e-07 *** ## hp:wt 0.02785 0.00742 3.753 0.000811 *** ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 ## ## Residual standard error: 2.153 on 28 degrees of freedom ## Multiple R-squared: 0.8848, Adjusted R-squared: 0.8724 ## F-statistic: 71.66 on 3 and 28 DF, p-value: 2.981e-13
library(effects)
## Loading required package: carData
## ## Attaching package: 'carData'
## The following objects are masked from 'package:car': ## ## Guyer, UN, Vocab
## lattice theme set by effectsTheme() ## See ?effectsTheme for details.
plot(effect("hp:wt", fit,, list(wt=c(2.2, 3.2, 4.2))), multiline=TRUE)
# simple regression diagnostics fit <- lm(weight ~ height, data=women) par(mfrow=c(2,2)) plot(fit)
newfit <- lm(weight ~ height + I(height^2), data=women) par(opar) par(mfrow=c(2,2)) plot(newfit)
par(opar) # basic regression diagnostics for states data opar <- par(no.readonly=TRUE) fit <- lm(weight ~ height, data=women) par(mfrow=c(2,2)) plot(fit)
par(opar) fit2 <- lm(weight ~ height + I(height^2), data=women) opar <- par(no.readonly=TRUE) par(mfrow=c(2,2)) plot(fit2)
par(opar)
# Assessing normality
library(car)
states <- as.data.frame(state.x77[,c("Murder", "Population",
"Illiteracy", "Income", "Frost")])
fit <- lm(Murder ~ Population + Illiteracy + Income + Frost, data=states)
qqPlot(fit, labels=row.names(states), id.method="identify",
simulate=TRUE, main="Q-Q Plot")