YouTube “Multiple Regression” videos
Home Price Data
home_prices <- data.frame(
price=c(145,69.9,315,144.9,134.9,369,95,228.9,149,295,388.5,75,130,174,334.9),
sqft=c(1872,1954,4104,1524,1297,3278,1192,2252,1620,2466,3188,1061,1195,1552,2901),
exempHS=factor(c(0,0,1,0,0,1,0,1,0,1,1,0,0,1,1)))
home_prices
## price sqft exempHS
## 1 145.0 1872 0
## 2 69.9 1954 0
## 3 315.0 4104 1
## 4 144.9 1524 0
## 5 134.9 1297 0
## 6 369.0 3278 1
## 7 95.0 1192 0
## 8 228.9 2252 1
## 9 149.0 1620 0
## 10 295.0 2466 1
## 11 388.5 3188 1
## 12 75.0 1061 0
## 13 130.0 1195 0
## 14 174.0 1552 1
## 15 334.9 2901 1
Plotting sqft vs price
library(ggplot2)
qplot(sqft, price, data=home_prices, color=exempHS)
Regression analysis
fit <- lm(price ~ ., data=home_prices)
summary(fit)
##
## Call:
## lm(formula = price ~ ., data = home_prices)
##
## Residuals:
## Min 1Q Median 3Q Max
## -78.45 -27.26 16.22 28.04 64.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.07494 33.68996 0.804 0.4372
## sqft 0.06207 0.02032 3.054 0.0100 *
## exempHS1 98.64787 35.96319 2.743 0.0178 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.65 on 12 degrees of freedom
## Multiple R-squared: 0.857, Adjusted R-squared: 0.8331
## F-statistic: 35.94 on 2 and 12 DF, p-value: 8.569e-06
Graphing regression lines
g <- ggplot(home_prices, aes(x=sqft, y=price, color=exempHS))
g <- g + geom_point()
g <- g + geom_smooth(method=lm, se=FALSE)
g