YouTube “Multiple Regression” videos
Home Price Data
E <- 0
S <- 2
W <- 3
N <- 1
home_prices <- data.frame(
price=c(450,398,412,307,289,310,245,260,290,332,377,322,383,404,371),
sqft=c(3860,3787,3681,3643,3601,3485,3401,3312,3213,3207,3124,3109,3076,3073,3052),
exempHS=factor(c(1,1,1,1,1,1,1,0,1,1,1,1,1,1,1)),
region=factor(c(S,N,S,N,E,S,W,N,W,S,S,S,E,S,S)))
home_prices
## price sqft exempHS region
## 1 450 3860 1 2
## 2 398 3787 1 1
## 3 412 3681 1 2
## 4 307 3643 1 1
## 5 289 3601 1 0
## 6 310 3485 1 2
## 7 245 3401 1 3
## 8 260 3312 0 1
## 9 290 3213 1 3
## 10 332 3207 1 2
## 11 377 3124 1 2
## 12 322 3109 1 2
## 13 383 3076 1 0
## 14 404 3073 1 2
## 15 371 3052 1 2
Scatter plot of sqft vs price with exemplary high school
library(ggplot2)
g <- ggplot(home_prices, aes(x=sqft, y=price, color=exempHS))
g <- g + geom_point()
g <- g + geom_smooth(method=lm, se=FALSE)
g
Scatter plot of sqft vs price with region
g <- ggplot(home_prices, aes(x=sqft, y=price, color=region))
g <- g + geom_point()
g <- g + geom_smooth(method=lm, se=FALSE)
g
Regression Model
fit <- lm(price ~ ., data=home_prices)
summary(fit)
##
## Call:
## lm(formula = price ~ ., data = home_prices)
##
## Residuals:
## Min 1Q Median 3Q Max
## -68.062 -39.267 8.557 33.834 58.411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 137.61239 184.77308 0.745 0.475
## sqft 0.03607 0.05756 0.627 0.546
## exempHS1 77.96322 68.77479 1.134 0.286
## region1 2.91912 57.13357 0.051 0.960
## region2 36.77754 41.80086 0.880 0.402
## region3 -67.36375 52.89474 -1.274 0.235
##
## Residual standard error: 52.86 on 9 degrees of freedom
## Multiple R-squared: 0.5131, Adjusted R-squared: 0.2427
## F-statistic: 1.897 on 5 and 9 DF, p-value: 0.1904